hp_ablations_mistral_epoch4 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
1cfef4e verified
{"current_steps": 10, "total_steps": 1972, "loss": 0.7568, "lr": 5e-06, "epoch": 0.020253164556962026, "percentage": 0.51, "elapsed_time": "0:04:44", "remaining_time": "15:29:58"}
{"current_steps": 20, "total_steps": 1972, "loss": 0.6504, "lr": 5e-06, "epoch": 0.04050632911392405, "percentage": 1.01, "elapsed_time": "0:09:25", "remaining_time": "15:19:09"}
{"current_steps": 30, "total_steps": 1972, "loss": 0.6287, "lr": 5e-06, "epoch": 0.060759493670886074, "percentage": 1.52, "elapsed_time": "0:14:05", "remaining_time": "15:12:31"}
{"current_steps": 40, "total_steps": 1972, "loss": 0.6135, "lr": 5e-06, "epoch": 0.0810126582278481, "percentage": 2.03, "elapsed_time": "0:18:46", "remaining_time": "15:06:55"}
{"current_steps": 50, "total_steps": 1972, "loss": 0.6037, "lr": 5e-06, "epoch": 0.10126582278481013, "percentage": 2.54, "elapsed_time": "0:23:27", "remaining_time": "15:01:49"}
{"current_steps": 60, "total_steps": 1972, "loss": 0.5974, "lr": 5e-06, "epoch": 0.12151898734177215, "percentage": 3.04, "elapsed_time": "0:28:08", "remaining_time": "14:56:45"}
{"current_steps": 70, "total_steps": 1972, "loss": 0.5923, "lr": 5e-06, "epoch": 0.14177215189873418, "percentage": 3.55, "elapsed_time": "0:32:49", "remaining_time": "14:51:52"}
{"current_steps": 80, "total_steps": 1972, "loss": 0.5879, "lr": 5e-06, "epoch": 0.1620253164556962, "percentage": 4.06, "elapsed_time": "0:37:30", "remaining_time": "14:47:04"}
{"current_steps": 90, "total_steps": 1972, "loss": 0.5869, "lr": 5e-06, "epoch": 0.18227848101265823, "percentage": 4.56, "elapsed_time": "0:42:11", "remaining_time": "14:42:14"}
{"current_steps": 100, "total_steps": 1972, "loss": 0.5894, "lr": 5e-06, "epoch": 0.20253164556962025, "percentage": 5.07, "elapsed_time": "0:46:52", "remaining_time": "14:37:24"}
{"current_steps": 110, "total_steps": 1972, "loss": 0.5825, "lr": 5e-06, "epoch": 0.22278481012658227, "percentage": 5.58, "elapsed_time": "0:51:33", "remaining_time": "14:32:37"}
{"current_steps": 120, "total_steps": 1972, "loss": 0.5792, "lr": 5e-06, "epoch": 0.2430379746835443, "percentage": 6.09, "elapsed_time": "0:56:14", "remaining_time": "14:27:52"}
{"current_steps": 130, "total_steps": 1972, "loss": 0.5765, "lr": 5e-06, "epoch": 0.26329113924050634, "percentage": 6.59, "elapsed_time": "1:00:55", "remaining_time": "14:23:08"}
{"current_steps": 140, "total_steps": 1972, "loss": 0.5657, "lr": 5e-06, "epoch": 0.28354430379746837, "percentage": 7.1, "elapsed_time": "1:05:36", "remaining_time": "14:18:28"}
{"current_steps": 150, "total_steps": 1972, "loss": 0.576, "lr": 5e-06, "epoch": 0.3037974683544304, "percentage": 7.61, "elapsed_time": "1:10:17", "remaining_time": "14:13:45"}
{"current_steps": 160, "total_steps": 1972, "loss": 0.5748, "lr": 5e-06, "epoch": 0.3240506329113924, "percentage": 8.11, "elapsed_time": "1:14:58", "remaining_time": "14:09:02"}
{"current_steps": 170, "total_steps": 1972, "loss": 0.5762, "lr": 5e-06, "epoch": 0.34430379746835443, "percentage": 8.62, "elapsed_time": "1:19:39", "remaining_time": "14:04:20"}
{"current_steps": 180, "total_steps": 1972, "loss": 0.5726, "lr": 5e-06, "epoch": 0.36455696202531646, "percentage": 9.13, "elapsed_time": "1:24:20", "remaining_time": "13:59:37"}
{"current_steps": 190, "total_steps": 1972, "loss": 0.5724, "lr": 5e-06, "epoch": 0.3848101265822785, "percentage": 9.63, "elapsed_time": "1:29:01", "remaining_time": "13:54:55"}
{"current_steps": 200, "total_steps": 1972, "loss": 0.567, "lr": 5e-06, "epoch": 0.4050632911392405, "percentage": 10.14, "elapsed_time": "1:33:42", "remaining_time": "13:50:14"}
{"current_steps": 210, "total_steps": 1972, "loss": 0.5628, "lr": 5e-06, "epoch": 0.4253164556962025, "percentage": 10.65, "elapsed_time": "1:38:23", "remaining_time": "13:45:33"}
{"current_steps": 220, "total_steps": 1972, "loss": 0.5679, "lr": 5e-06, "epoch": 0.44556962025316454, "percentage": 11.16, "elapsed_time": "1:43:04", "remaining_time": "13:40:52"}
{"current_steps": 230, "total_steps": 1972, "loss": 0.5676, "lr": 5e-06, "epoch": 0.46582278481012657, "percentage": 11.66, "elapsed_time": "1:47:45", "remaining_time": "13:36:09"}
{"current_steps": 240, "total_steps": 1972, "loss": 0.5644, "lr": 5e-06, "epoch": 0.4860759493670886, "percentage": 12.17, "elapsed_time": "1:52:26", "remaining_time": "13:31:27"}
{"current_steps": 250, "total_steps": 1972, "loss": 0.5686, "lr": 5e-06, "epoch": 0.5063291139240507, "percentage": 12.68, "elapsed_time": "1:57:07", "remaining_time": "13:26:45"}
{"current_steps": 260, "total_steps": 1972, "loss": 0.5554, "lr": 5e-06, "epoch": 0.5265822784810127, "percentage": 13.18, "elapsed_time": "2:01:48", "remaining_time": "13:22:02"}
{"current_steps": 270, "total_steps": 1972, "loss": 0.5643, "lr": 5e-06, "epoch": 0.5468354430379747, "percentage": 13.69, "elapsed_time": "2:06:29", "remaining_time": "13:17:20"}
{"current_steps": 280, "total_steps": 1972, "loss": 0.56, "lr": 5e-06, "epoch": 0.5670886075949367, "percentage": 14.2, "elapsed_time": "2:11:10", "remaining_time": "13:12:39"}
{"current_steps": 290, "total_steps": 1972, "loss": 0.56, "lr": 5e-06, "epoch": 0.5873417721518988, "percentage": 14.71, "elapsed_time": "2:15:51", "remaining_time": "13:07:57"}
{"current_steps": 300, "total_steps": 1972, "loss": 0.5596, "lr": 5e-06, "epoch": 0.6075949367088608, "percentage": 15.21, "elapsed_time": "2:20:32", "remaining_time": "13:03:15"}
{"current_steps": 310, "total_steps": 1972, "loss": 0.5655, "lr": 5e-06, "epoch": 0.6278481012658228, "percentage": 15.72, "elapsed_time": "2:25:12", "remaining_time": "12:58:32"}
{"current_steps": 320, "total_steps": 1972, "loss": 0.5611, "lr": 5e-06, "epoch": 0.6481012658227848, "percentage": 16.23, "elapsed_time": "2:29:53", "remaining_time": "12:53:51"}
{"current_steps": 330, "total_steps": 1972, "loss": 0.5607, "lr": 5e-06, "epoch": 0.6683544303797468, "percentage": 16.73, "elapsed_time": "2:34:34", "remaining_time": "12:49:09"}
{"current_steps": 340, "total_steps": 1972, "loss": 0.5597, "lr": 5e-06, "epoch": 0.6886075949367089, "percentage": 17.24, "elapsed_time": "2:39:15", "remaining_time": "12:44:28"}
{"current_steps": 350, "total_steps": 1972, "loss": 0.5614, "lr": 5e-06, "epoch": 0.7088607594936709, "percentage": 17.75, "elapsed_time": "2:43:56", "remaining_time": "12:39:47"}
{"current_steps": 360, "total_steps": 1972, "loss": 0.5638, "lr": 5e-06, "epoch": 0.7291139240506329, "percentage": 18.26, "elapsed_time": "2:48:38", "remaining_time": "12:35:07"}
{"current_steps": 370, "total_steps": 1972, "loss": 0.5566, "lr": 5e-06, "epoch": 0.7493670886075949, "percentage": 18.76, "elapsed_time": "2:53:19", "remaining_time": "12:30:26"}
{"current_steps": 380, "total_steps": 1972, "loss": 0.5533, "lr": 5e-06, "epoch": 0.769620253164557, "percentage": 19.27, "elapsed_time": "2:58:00", "remaining_time": "12:25:46"}
{"current_steps": 390, "total_steps": 1972, "loss": 0.5559, "lr": 5e-06, "epoch": 0.789873417721519, "percentage": 19.78, "elapsed_time": "3:02:41", "remaining_time": "12:21:04"}
{"current_steps": 400, "total_steps": 1972, "loss": 0.5566, "lr": 5e-06, "epoch": 0.810126582278481, "percentage": 20.28, "elapsed_time": "3:07:22", "remaining_time": "12:16:23"}
{"current_steps": 410, "total_steps": 1972, "loss": 0.5514, "lr": 5e-06, "epoch": 0.830379746835443, "percentage": 20.79, "elapsed_time": "3:12:03", "remaining_time": "12:11:42"}
{"current_steps": 420, "total_steps": 1972, "loss": 0.5587, "lr": 5e-06, "epoch": 0.850632911392405, "percentage": 21.3, "elapsed_time": "3:16:45", "remaining_time": "12:07:02"}
{"current_steps": 430, "total_steps": 1972, "loss": 0.5521, "lr": 5e-06, "epoch": 0.8708860759493671, "percentage": 21.81, "elapsed_time": "3:21:25", "remaining_time": "12:02:20"}
{"current_steps": 440, "total_steps": 1972, "loss": 0.5573, "lr": 5e-06, "epoch": 0.8911392405063291, "percentage": 22.31, "elapsed_time": "3:26:06", "remaining_time": "11:57:39"}
{"current_steps": 450, "total_steps": 1972, "loss": 0.5488, "lr": 5e-06, "epoch": 0.9113924050632911, "percentage": 22.82, "elapsed_time": "3:30:47", "remaining_time": "11:52:57"}
{"current_steps": 460, "total_steps": 1972, "loss": 0.5532, "lr": 5e-06, "epoch": 0.9316455696202531, "percentage": 23.33, "elapsed_time": "3:35:28", "remaining_time": "11:48:15"}
{"current_steps": 470, "total_steps": 1972, "loss": 0.5539, "lr": 5e-06, "epoch": 0.9518987341772152, "percentage": 23.83, "elapsed_time": "3:40:09", "remaining_time": "11:43:33"}
{"current_steps": 480, "total_steps": 1972, "loss": 0.5515, "lr": 5e-06, "epoch": 0.9721518987341772, "percentage": 24.34, "elapsed_time": "3:44:50", "remaining_time": "11:38:52"}
{"current_steps": 490, "total_steps": 1972, "loss": 0.548, "lr": 5e-06, "epoch": 0.9924050632911392, "percentage": 24.85, "elapsed_time": "3:49:31", "remaining_time": "11:34:10"}
{"current_steps": 493, "total_steps": 1972, "eval_loss": 0.13772521913051605, "epoch": 0.9984810126582279, "percentage": 25.0, "elapsed_time": "3:55:22", "remaining_time": "11:46:07"}
{"current_steps": 500, "total_steps": 1972, "loss": 0.5056, "lr": 5e-06, "epoch": 1.0131645569620253, "percentage": 25.35, "elapsed_time": "3:59:17", "remaining_time": "11:44:27"}
{"current_steps": 510, "total_steps": 1972, "loss": 0.4717, "lr": 5e-06, "epoch": 1.0334177215189873, "percentage": 25.86, "elapsed_time": "4:03:58", "remaining_time": "11:39:23"}
{"current_steps": 520, "total_steps": 1972, "loss": 0.4683, "lr": 5e-06, "epoch": 1.0536708860759494, "percentage": 26.37, "elapsed_time": "4:08:39", "remaining_time": "11:34:20"}
{"current_steps": 530, "total_steps": 1972, "loss": 0.4682, "lr": 5e-06, "epoch": 1.0739240506329113, "percentage": 26.88, "elapsed_time": "4:13:21", "remaining_time": "11:29:18"}
{"current_steps": 540, "total_steps": 1972, "loss": 0.467, "lr": 5e-06, "epoch": 1.0941772151898734, "percentage": 27.38, "elapsed_time": "4:18:02", "remaining_time": "11:24:16"}
{"current_steps": 550, "total_steps": 1972, "loss": 0.4664, "lr": 5e-06, "epoch": 1.1144303797468353, "percentage": 27.89, "elapsed_time": "4:22:43", "remaining_time": "11:19:16"}
{"current_steps": 560, "total_steps": 1972, "loss": 0.4629, "lr": 5e-06, "epoch": 1.1346835443037975, "percentage": 28.4, "elapsed_time": "4:27:24", "remaining_time": "11:14:16"}
{"current_steps": 570, "total_steps": 1972, "loss": 0.4723, "lr": 5e-06, "epoch": 1.1549367088607596, "percentage": 28.9, "elapsed_time": "4:32:06", "remaining_time": "11:09:16"}
{"current_steps": 580, "total_steps": 1972, "loss": 0.4696, "lr": 5e-06, "epoch": 1.1751898734177215, "percentage": 29.41, "elapsed_time": "4:36:47", "remaining_time": "11:04:17"}
{"current_steps": 590, "total_steps": 1972, "loss": 0.4655, "lr": 5e-06, "epoch": 1.1954430379746834, "percentage": 29.92, "elapsed_time": "4:41:28", "remaining_time": "10:59:19"}
{"current_steps": 600, "total_steps": 1972, "loss": 0.4738, "lr": 5e-06, "epoch": 1.2156962025316456, "percentage": 30.43, "elapsed_time": "4:46:10", "remaining_time": "10:54:22"}
{"current_steps": 610, "total_steps": 1972, "loss": 0.4689, "lr": 5e-06, "epoch": 1.2359493670886077, "percentage": 30.93, "elapsed_time": "4:50:51", "remaining_time": "10:49:25"}
{"current_steps": 620, "total_steps": 1972, "loss": 0.4738, "lr": 5e-06, "epoch": 1.2562025316455696, "percentage": 31.44, "elapsed_time": "4:55:33", "remaining_time": "10:44:29"}
{"current_steps": 630, "total_steps": 1972, "loss": 0.4724, "lr": 5e-06, "epoch": 1.2764556962025315, "percentage": 31.95, "elapsed_time": "5:00:14", "remaining_time": "10:39:34"}
{"current_steps": 640, "total_steps": 1972, "loss": 0.4717, "lr": 5e-06, "epoch": 1.2967088607594937, "percentage": 32.45, "elapsed_time": "5:04:56", "remaining_time": "10:34:38"}
{"current_steps": 650, "total_steps": 1972, "loss": 0.4756, "lr": 5e-06, "epoch": 1.3169620253164558, "percentage": 32.96, "elapsed_time": "5:09:37", "remaining_time": "10:29:44"}
{"current_steps": 660, "total_steps": 1972, "loss": 0.4653, "lr": 5e-06, "epoch": 1.3372151898734177, "percentage": 33.47, "elapsed_time": "5:14:19", "remaining_time": "10:24:49"}
{"current_steps": 670, "total_steps": 1972, "loss": 0.4791, "lr": 5e-06, "epoch": 1.3574683544303796, "percentage": 33.98, "elapsed_time": "5:19:00", "remaining_time": "10:19:55"}
{"current_steps": 680, "total_steps": 1972, "loss": 0.4743, "lr": 5e-06, "epoch": 1.3777215189873417, "percentage": 34.48, "elapsed_time": "5:23:42", "remaining_time": "10:15:02"}
{"current_steps": 690, "total_steps": 1972, "loss": 0.4733, "lr": 5e-06, "epoch": 1.3979746835443039, "percentage": 34.99, "elapsed_time": "5:28:23", "remaining_time": "10:10:08"}
{"current_steps": 700, "total_steps": 1972, "loss": 0.4695, "lr": 5e-06, "epoch": 1.4182278481012658, "percentage": 35.5, "elapsed_time": "5:33:05", "remaining_time": "10:05:16"}
{"current_steps": 710, "total_steps": 1972, "loss": 0.4756, "lr": 5e-06, "epoch": 1.438481012658228, "percentage": 36.0, "elapsed_time": "5:37:46", "remaining_time": "10:00:23"}
{"current_steps": 720, "total_steps": 1972, "loss": 0.4757, "lr": 5e-06, "epoch": 1.4587341772151898, "percentage": 36.51, "elapsed_time": "5:42:28", "remaining_time": "9:55:30"}
{"current_steps": 730, "total_steps": 1972, "loss": 0.479, "lr": 5e-06, "epoch": 1.478987341772152, "percentage": 37.02, "elapsed_time": "5:47:09", "remaining_time": "9:50:38"}
{"current_steps": 740, "total_steps": 1972, "loss": 0.4737, "lr": 5e-06, "epoch": 1.4992405063291139, "percentage": 37.53, "elapsed_time": "5:51:50", "remaining_time": "9:45:45"}
{"current_steps": 750, "total_steps": 1972, "loss": 0.4764, "lr": 5e-06, "epoch": 1.5194936708860758, "percentage": 38.03, "elapsed_time": "5:56:31", "remaining_time": "9:40:53"}
{"current_steps": 760, "total_steps": 1972, "loss": 0.4765, "lr": 5e-06, "epoch": 1.539746835443038, "percentage": 38.54, "elapsed_time": "6:01:12", "remaining_time": "9:36:01"}
{"current_steps": 770, "total_steps": 1972, "loss": 0.4765, "lr": 5e-06, "epoch": 1.56, "percentage": 39.05, "elapsed_time": "6:05:53", "remaining_time": "9:31:10"}
{"current_steps": 780, "total_steps": 1972, "loss": 0.4762, "lr": 5e-06, "epoch": 1.5802531645569622, "percentage": 39.55, "elapsed_time": "6:10:35", "remaining_time": "9:26:20"}
{"current_steps": 790, "total_steps": 1972, "loss": 0.4797, "lr": 5e-06, "epoch": 1.600506329113924, "percentage": 40.06, "elapsed_time": "6:15:16", "remaining_time": "9:21:29"}
{"current_steps": 800, "total_steps": 1972, "loss": 0.4765, "lr": 5e-06, "epoch": 1.620759493670886, "percentage": 40.57, "elapsed_time": "6:19:57", "remaining_time": "9:16:38"}
{"current_steps": 810, "total_steps": 1972, "loss": 0.4685, "lr": 5e-06, "epoch": 1.6410126582278481, "percentage": 41.08, "elapsed_time": "6:24:38", "remaining_time": "9:11:47"}
{"current_steps": 820, "total_steps": 1972, "loss": 0.4771, "lr": 5e-06, "epoch": 1.6612658227848103, "percentage": 41.58, "elapsed_time": "6:29:19", "remaining_time": "9:06:57"}
{"current_steps": 830, "total_steps": 1972, "loss": 0.4721, "lr": 5e-06, "epoch": 1.6815189873417722, "percentage": 42.09, "elapsed_time": "6:34:00", "remaining_time": "9:02:07"}
{"current_steps": 840, "total_steps": 1972, "loss": 0.4731, "lr": 5e-06, "epoch": 1.701772151898734, "percentage": 42.6, "elapsed_time": "6:38:42", "remaining_time": "8:57:17"}
{"current_steps": 850, "total_steps": 1972, "loss": 0.4766, "lr": 5e-06, "epoch": 1.7220253164556962, "percentage": 43.1, "elapsed_time": "6:43:23", "remaining_time": "8:52:28"}
{"current_steps": 860, "total_steps": 1972, "loss": 0.4767, "lr": 5e-06, "epoch": 1.7422784810126584, "percentage": 43.61, "elapsed_time": "6:48:04", "remaining_time": "8:47:39"}
{"current_steps": 870, "total_steps": 1972, "loss": 0.4776, "lr": 5e-06, "epoch": 1.7625316455696203, "percentage": 44.12, "elapsed_time": "6:52:46", "remaining_time": "8:42:50"}
{"current_steps": 880, "total_steps": 1972, "loss": 0.4815, "lr": 5e-06, "epoch": 1.7827848101265822, "percentage": 44.62, "elapsed_time": "6:57:27", "remaining_time": "8:38:01"}
{"current_steps": 890, "total_steps": 1972, "loss": 0.4788, "lr": 5e-06, "epoch": 1.8030379746835443, "percentage": 45.13, "elapsed_time": "7:02:09", "remaining_time": "8:33:13"}
{"current_steps": 900, "total_steps": 1972, "loss": 0.477, "lr": 5e-06, "epoch": 1.8232911392405065, "percentage": 45.64, "elapsed_time": "7:06:50", "remaining_time": "8:28:24"}
{"current_steps": 910, "total_steps": 1972, "loss": 0.4789, "lr": 5e-06, "epoch": 1.8435443037974684, "percentage": 46.15, "elapsed_time": "7:11:31", "remaining_time": "8:23:36"}
{"current_steps": 920, "total_steps": 1972, "loss": 0.4791, "lr": 5e-06, "epoch": 1.8637974683544303, "percentage": 46.65, "elapsed_time": "7:16:13", "remaining_time": "8:18:48"}
{"current_steps": 930, "total_steps": 1972, "loss": 0.4746, "lr": 5e-06, "epoch": 1.8840506329113924, "percentage": 47.16, "elapsed_time": "7:20:54", "remaining_time": "8:14:00"}
{"current_steps": 940, "total_steps": 1972, "loss": 0.4774, "lr": 5e-06, "epoch": 1.9043037974683545, "percentage": 47.67, "elapsed_time": "7:25:34", "remaining_time": "8:09:11"}
{"current_steps": 950, "total_steps": 1972, "loss": 0.4799, "lr": 5e-06, "epoch": 1.9245569620253165, "percentage": 48.17, "elapsed_time": "7:30:15", "remaining_time": "8:04:23"}
{"current_steps": 960, "total_steps": 1972, "loss": 0.4834, "lr": 5e-06, "epoch": 1.9448101265822784, "percentage": 48.68, "elapsed_time": "7:34:56", "remaining_time": "7:59:35"}
{"current_steps": 970, "total_steps": 1972, "loss": 0.481, "lr": 5e-06, "epoch": 1.9650632911392405, "percentage": 49.19, "elapsed_time": "7:39:37", "remaining_time": "7:54:47"}
{"current_steps": 980, "total_steps": 1972, "loss": 0.4823, "lr": 5e-06, "epoch": 1.9853164556962026, "percentage": 49.7, "elapsed_time": "7:44:18", "remaining_time": "7:49:59"}
{"current_steps": 987, "total_steps": 1972, "eval_loss": 0.13791824877262115, "epoch": 1.999493670886076, "percentage": 50.05, "elapsed_time": "7:51:45", "remaining_time": "7:50:48"}
{"current_steps": 990, "total_steps": 1972, "loss": 0.4515, "lr": 5e-06, "epoch": 2.0060759493670886, "percentage": 50.2, "elapsed_time": "7:54:03", "remaining_time": "7:50:13"}
{"current_steps": 1000, "total_steps": 1972, "loss": 0.3891, "lr": 5e-06, "epoch": 2.0263291139240507, "percentage": 50.71, "elapsed_time": "7:58:44", "remaining_time": "7:45:20"}
{"current_steps": 1010, "total_steps": 1972, "loss": 0.3814, "lr": 5e-06, "epoch": 2.046582278481013, "percentage": 51.22, "elapsed_time": "8:03:25", "remaining_time": "7:40:27"}
{"current_steps": 1020, "total_steps": 1972, "loss": 0.3794, "lr": 5e-06, "epoch": 2.0668354430379745, "percentage": 51.72, "elapsed_time": "8:08:07", "remaining_time": "7:35:35"}
{"current_steps": 1030, "total_steps": 1972, "loss": 0.3793, "lr": 5e-06, "epoch": 2.0870886075949366, "percentage": 52.23, "elapsed_time": "8:12:49", "remaining_time": "7:30:43"}
{"current_steps": 1040, "total_steps": 1972, "loss": 0.3758, "lr": 5e-06, "epoch": 2.1073417721518988, "percentage": 52.74, "elapsed_time": "8:17:31", "remaining_time": "7:25:51"}
{"current_steps": 1050, "total_steps": 1972, "loss": 0.3868, "lr": 5e-06, "epoch": 2.127594936708861, "percentage": 53.25, "elapsed_time": "8:22:12", "remaining_time": "7:20:59"}
{"current_steps": 1060, "total_steps": 1972, "loss": 0.3827, "lr": 5e-06, "epoch": 2.1478481012658226, "percentage": 53.75, "elapsed_time": "8:26:54", "remaining_time": "7:16:07"}
{"current_steps": 1070, "total_steps": 1972, "loss": 0.3852, "lr": 5e-06, "epoch": 2.1681012658227847, "percentage": 54.26, "elapsed_time": "8:31:36", "remaining_time": "7:11:16"}
{"current_steps": 1080, "total_steps": 1972, "loss": 0.3841, "lr": 5e-06, "epoch": 2.188354430379747, "percentage": 54.77, "elapsed_time": "8:36:17", "remaining_time": "7:06:25"}
{"current_steps": 1090, "total_steps": 1972, "loss": 0.3818, "lr": 5e-06, "epoch": 2.208607594936709, "percentage": 55.27, "elapsed_time": "8:40:59", "remaining_time": "7:01:34"}
{"current_steps": 1100, "total_steps": 1972, "loss": 0.3909, "lr": 5e-06, "epoch": 2.2288607594936707, "percentage": 55.78, "elapsed_time": "8:45:40", "remaining_time": "6:56:43"}
{"current_steps": 1110, "total_steps": 1972, "loss": 0.3884, "lr": 5e-06, "epoch": 2.249113924050633, "percentage": 56.29, "elapsed_time": "8:50:22", "remaining_time": "6:51:52"}
{"current_steps": 1120, "total_steps": 1972, "loss": 0.387, "lr": 5e-06, "epoch": 2.269367088607595, "percentage": 56.8, "elapsed_time": "8:55:03", "remaining_time": "6:47:01"}
{"current_steps": 1130, "total_steps": 1972, "loss": 0.3894, "lr": 5e-06, "epoch": 2.289620253164557, "percentage": 57.3, "elapsed_time": "8:59:45", "remaining_time": "6:42:11"}
{"current_steps": 1140, "total_steps": 1972, "loss": 0.3842, "lr": 5e-06, "epoch": 2.309873417721519, "percentage": 57.81, "elapsed_time": "9:04:26", "remaining_time": "6:37:20"}
{"current_steps": 1150, "total_steps": 1972, "loss": 0.3908, "lr": 5e-06, "epoch": 2.330126582278481, "percentage": 58.32, "elapsed_time": "9:09:07", "remaining_time": "6:32:30"}
{"current_steps": 1160, "total_steps": 1972, "loss": 0.3903, "lr": 5e-06, "epoch": 2.350379746835443, "percentage": 58.82, "elapsed_time": "9:13:49", "remaining_time": "6:27:40"}
{"current_steps": 1170, "total_steps": 1972, "loss": 0.3915, "lr": 5e-06, "epoch": 2.370632911392405, "percentage": 59.33, "elapsed_time": "9:18:31", "remaining_time": "6:22:50"}
{"current_steps": 1180, "total_steps": 1972, "loss": 0.3924, "lr": 5e-06, "epoch": 2.390886075949367, "percentage": 59.84, "elapsed_time": "9:23:12", "remaining_time": "6:18:00"}
{"current_steps": 1190, "total_steps": 1972, "loss": 0.3905, "lr": 5e-06, "epoch": 2.411139240506329, "percentage": 60.34, "elapsed_time": "9:27:53", "remaining_time": "6:13:11"}
{"current_steps": 1200, "total_steps": 1972, "loss": 0.3899, "lr": 5e-06, "epoch": 2.431392405063291, "percentage": 60.85, "elapsed_time": "9:32:35", "remaining_time": "6:08:21"}
{"current_steps": 1210, "total_steps": 1972, "loss": 0.3878, "lr": 5e-06, "epoch": 2.4516455696202533, "percentage": 61.36, "elapsed_time": "9:37:16", "remaining_time": "6:03:32"}
{"current_steps": 1220, "total_steps": 1972, "loss": 0.3907, "lr": 5e-06, "epoch": 2.4718987341772154, "percentage": 61.87, "elapsed_time": "9:41:58", "remaining_time": "5:58:43"}
{"current_steps": 1230, "total_steps": 1972, "loss": 0.3937, "lr": 5e-06, "epoch": 2.492151898734177, "percentage": 62.37, "elapsed_time": "9:46:39", "remaining_time": "5:53:54"}
{"current_steps": 1240, "total_steps": 1972, "loss": 0.393, "lr": 5e-06, "epoch": 2.512405063291139, "percentage": 62.88, "elapsed_time": "9:51:21", "remaining_time": "5:49:05"}
{"current_steps": 1250, "total_steps": 1972, "loss": 0.3921, "lr": 5e-06, "epoch": 2.5326582278481014, "percentage": 63.39, "elapsed_time": "9:56:02", "remaining_time": "5:44:16"}
{"current_steps": 1260, "total_steps": 1972, "loss": 0.386, "lr": 5e-06, "epoch": 2.552911392405063, "percentage": 63.89, "elapsed_time": "10:00:44", "remaining_time": "5:39:27"}
{"current_steps": 1270, "total_steps": 1972, "loss": 0.3929, "lr": 5e-06, "epoch": 2.573164556962025, "percentage": 64.4, "elapsed_time": "10:05:26", "remaining_time": "5:34:39"}
{"current_steps": 1280, "total_steps": 1972, "loss": 0.3956, "lr": 5e-06, "epoch": 2.5934177215189873, "percentage": 64.91, "elapsed_time": "10:10:07", "remaining_time": "5:29:51"}
{"current_steps": 1290, "total_steps": 1972, "loss": 0.3985, "lr": 5e-06, "epoch": 2.6136708860759494, "percentage": 65.42, "elapsed_time": "10:14:49", "remaining_time": "5:25:02"}
{"current_steps": 1300, "total_steps": 1972, "loss": 0.3949, "lr": 5e-06, "epoch": 2.6339240506329116, "percentage": 65.92, "elapsed_time": "10:19:31", "remaining_time": "5:20:14"}
{"current_steps": 1310, "total_steps": 1972, "loss": 0.3949, "lr": 5e-06, "epoch": 2.6541772151898733, "percentage": 66.43, "elapsed_time": "10:24:12", "remaining_time": "5:15:26"}
{"current_steps": 1320, "total_steps": 1972, "loss": 0.3972, "lr": 5e-06, "epoch": 2.6744303797468354, "percentage": 66.94, "elapsed_time": "10:28:54", "remaining_time": "5:10:38"}
{"current_steps": 1330, "total_steps": 1972, "loss": 0.3975, "lr": 5e-06, "epoch": 2.6946835443037975, "percentage": 67.44, "elapsed_time": "10:33:36", "remaining_time": "5:05:50"}
{"current_steps": 1340, "total_steps": 1972, "loss": 0.3998, "lr": 5e-06, "epoch": 2.714936708860759, "percentage": 67.95, "elapsed_time": "10:38:17", "remaining_time": "5:01:02"}
{"current_steps": 1350, "total_steps": 1972, "loss": 0.3972, "lr": 5e-06, "epoch": 2.7351898734177214, "percentage": 68.46, "elapsed_time": "10:42:58", "remaining_time": "4:56:14"}
{"current_steps": 1360, "total_steps": 1972, "loss": 0.3932, "lr": 5e-06, "epoch": 2.7554430379746835, "percentage": 68.97, "elapsed_time": "10:47:40", "remaining_time": "4:51:27"}
{"current_steps": 1370, "total_steps": 1972, "loss": 0.3999, "lr": 5e-06, "epoch": 2.7756962025316456, "percentage": 69.47, "elapsed_time": "10:52:22", "remaining_time": "4:46:39"}
{"current_steps": 1380, "total_steps": 1972, "loss": 0.3974, "lr": 5e-06, "epoch": 2.7959493670886078, "percentage": 69.98, "elapsed_time": "10:57:03", "remaining_time": "4:41:52"}
{"current_steps": 1390, "total_steps": 1972, "loss": 0.4004, "lr": 5e-06, "epoch": 2.81620253164557, "percentage": 70.49, "elapsed_time": "11:01:44", "remaining_time": "4:37:04"}
{"current_steps": 1400, "total_steps": 1972, "loss": 0.3945, "lr": 5e-06, "epoch": 2.8364556962025316, "percentage": 70.99, "elapsed_time": "11:06:26", "remaining_time": "4:32:17"}
{"current_steps": 1410, "total_steps": 1972, "loss": 0.4014, "lr": 5e-06, "epoch": 2.8567088607594937, "percentage": 71.5, "elapsed_time": "11:11:07", "remaining_time": "4:27:30"}
{"current_steps": 1420, "total_steps": 1972, "loss": 0.3987, "lr": 5e-06, "epoch": 2.876962025316456, "percentage": 72.01, "elapsed_time": "11:15:49", "remaining_time": "4:22:42"}
{"current_steps": 1430, "total_steps": 1972, "loss": 0.3983, "lr": 5e-06, "epoch": 2.8972151898734175, "percentage": 72.52, "elapsed_time": "11:20:30", "remaining_time": "4:17:55"}
{"current_steps": 1440, "total_steps": 1972, "loss": 0.4016, "lr": 5e-06, "epoch": 2.9174683544303797, "percentage": 73.02, "elapsed_time": "11:25:11", "remaining_time": "4:13:08"}
{"current_steps": 1450, "total_steps": 1972, "loss": 0.4105, "lr": 5e-06, "epoch": 2.937721518987342, "percentage": 73.53, "elapsed_time": "11:29:53", "remaining_time": "4:08:21"}
{"current_steps": 1460, "total_steps": 1972, "loss": 0.3992, "lr": 5e-06, "epoch": 2.957974683544304, "percentage": 74.04, "elapsed_time": "11:34:34", "remaining_time": "4:03:34"}
{"current_steps": 1470, "total_steps": 1972, "loss": 0.403, "lr": 5e-06, "epoch": 2.978227848101266, "percentage": 74.54, "elapsed_time": "11:39:15", "remaining_time": "3:58:47"}
{"current_steps": 1480, "total_steps": 1972, "loss": 0.4008, "lr": 5e-06, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:43:56", "remaining_time": "3:54:00"}
{"current_steps": 1480, "total_steps": 1972, "eval_loss": 0.1451626569032669, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:48:23", "remaining_time": "3:55:29"}
{"current_steps": 1490, "total_steps": 1972, "loss": 0.3123, "lr": 5e-06, "epoch": 3.019240506329114, "percentage": 75.56, "elapsed_time": "11:53:44", "remaining_time": "3:50:53"}
{"current_steps": 1500, "total_steps": 1972, "loss": 0.2924, "lr": 5e-06, "epoch": 3.039493670886076, "percentage": 76.06, "elapsed_time": "11:58:25", "remaining_time": "3:46:04"}
{"current_steps": 1510, "total_steps": 1972, "loss": 0.2942, "lr": 5e-06, "epoch": 3.059746835443038, "percentage": 76.57, "elapsed_time": "12:03:07", "remaining_time": "3:41:14"}
{"current_steps": 1520, "total_steps": 1972, "loss": 0.2933, "lr": 5e-06, "epoch": 3.08, "percentage": 77.08, "elapsed_time": "12:07:48", "remaining_time": "3:36:25"}
{"current_steps": 1530, "total_steps": 1972, "loss": 0.2944, "lr": 5e-06, "epoch": 3.100253164556962, "percentage": 77.59, "elapsed_time": "12:12:30", "remaining_time": "3:31:36"}
{"current_steps": 1540, "total_steps": 1972, "loss": 0.292, "lr": 5e-06, "epoch": 3.120506329113924, "percentage": 78.09, "elapsed_time": "12:17:12", "remaining_time": "3:26:48"}
{"current_steps": 1550, "total_steps": 1972, "loss": 0.29, "lr": 5e-06, "epoch": 3.140759493670886, "percentage": 78.6, "elapsed_time": "12:21:53", "remaining_time": "3:21:59"}
{"current_steps": 1560, "total_steps": 1972, "loss": 0.2964, "lr": 5e-06, "epoch": 3.161012658227848, "percentage": 79.11, "elapsed_time": "12:26:35", "remaining_time": "3:17:10"}
{"current_steps": 1570, "total_steps": 1972, "loss": 0.2967, "lr": 5e-06, "epoch": 3.1812658227848103, "percentage": 79.61, "elapsed_time": "12:31:16", "remaining_time": "3:12:21"}
{"current_steps": 1580, "total_steps": 1972, "loss": 0.2999, "lr": 5e-06, "epoch": 3.201518987341772, "percentage": 80.12, "elapsed_time": "12:35:57", "remaining_time": "3:07:33"}
{"current_steps": 1590, "total_steps": 1972, "loss": 0.3003, "lr": 5e-06, "epoch": 3.221772151898734, "percentage": 80.63, "elapsed_time": "12:40:39", "remaining_time": "3:02:44"}
{"current_steps": 1600, "total_steps": 1972, "loss": 0.3007, "lr": 5e-06, "epoch": 3.2420253164556962, "percentage": 81.14, "elapsed_time": "12:45:20", "remaining_time": "2:57:56"}
{"current_steps": 1610, "total_steps": 1972, "loss": 0.2992, "lr": 5e-06, "epoch": 3.2622784810126584, "percentage": 81.64, "elapsed_time": "12:50:02", "remaining_time": "2:53:08"}
{"current_steps": 1620, "total_steps": 1972, "loss": 0.3016, "lr": 5e-06, "epoch": 3.28253164556962, "percentage": 82.15, "elapsed_time": "12:54:43", "remaining_time": "2:48:20"}
{"current_steps": 1630, "total_steps": 1972, "loss": 0.3046, "lr": 5e-06, "epoch": 3.302784810126582, "percentage": 82.66, "elapsed_time": "12:59:24", "remaining_time": "2:43:31"}
{"current_steps": 1640, "total_steps": 1972, "loss": 0.3014, "lr": 5e-06, "epoch": 3.3230379746835443, "percentage": 83.16, "elapsed_time": "13:04:05", "remaining_time": "2:38:43"}
{"current_steps": 1650, "total_steps": 1972, "loss": 0.3031, "lr": 5e-06, "epoch": 3.3432911392405065, "percentage": 83.67, "elapsed_time": "13:08:47", "remaining_time": "2:33:56"}
{"current_steps": 1660, "total_steps": 1972, "loss": 0.3052, "lr": 5e-06, "epoch": 3.363544303797468, "percentage": 84.18, "elapsed_time": "13:13:28", "remaining_time": "2:29:08"}
{"current_steps": 1670, "total_steps": 1972, "loss": 0.2994, "lr": 5e-06, "epoch": 3.3837974683544303, "percentage": 84.69, "elapsed_time": "13:18:10", "remaining_time": "2:24:20"}
{"current_steps": 1680, "total_steps": 1972, "loss": 0.2987, "lr": 5e-06, "epoch": 3.4040506329113924, "percentage": 85.19, "elapsed_time": "13:22:52", "remaining_time": "2:19:32"}
{"current_steps": 1690, "total_steps": 1972, "loss": 0.3028, "lr": 5e-06, "epoch": 3.4243037974683546, "percentage": 85.7, "elapsed_time": "13:27:33", "remaining_time": "2:14:45"}
{"current_steps": 1700, "total_steps": 1972, "loss": 0.3012, "lr": 5e-06, "epoch": 3.4445569620253167, "percentage": 86.21, "elapsed_time": "13:32:15", "remaining_time": "2:09:57"}
{"current_steps": 1710, "total_steps": 1972, "loss": 0.3033, "lr": 5e-06, "epoch": 3.4648101265822784, "percentage": 86.71, "elapsed_time": "13:36:56", "remaining_time": "2:05:10"}
{"current_steps": 1720, "total_steps": 1972, "loss": 0.3078, "lr": 5e-06, "epoch": 3.4850632911392405, "percentage": 87.22, "elapsed_time": "13:41:37", "remaining_time": "2:00:22"}
{"current_steps": 1730, "total_steps": 1972, "loss": 0.3076, "lr": 5e-06, "epoch": 3.5053164556962026, "percentage": 87.73, "elapsed_time": "13:46:19", "remaining_time": "1:55:35"}
{"current_steps": 1740, "total_steps": 1972, "loss": 0.3058, "lr": 5e-06, "epoch": 3.5255696202531643, "percentage": 88.24, "elapsed_time": "13:51:00", "remaining_time": "1:50:48"}
{"current_steps": 1750, "total_steps": 1972, "loss": 0.3092, "lr": 5e-06, "epoch": 3.5458227848101265, "percentage": 88.74, "elapsed_time": "13:55:42", "remaining_time": "1:46:00"}
{"current_steps": 1760, "total_steps": 1972, "loss": 0.3031, "lr": 5e-06, "epoch": 3.5660759493670886, "percentage": 89.25, "elapsed_time": "14:00:24", "remaining_time": "1:41:13"}
{"current_steps": 1770, "total_steps": 1972, "loss": 0.3097, "lr": 5e-06, "epoch": 3.5863291139240507, "percentage": 89.76, "elapsed_time": "14:05:05", "remaining_time": "1:36:26"}
{"current_steps": 1780, "total_steps": 1972, "loss": 0.3076, "lr": 5e-06, "epoch": 3.606582278481013, "percentage": 90.26, "elapsed_time": "14:09:47", "remaining_time": "1:31:39"}
{"current_steps": 1790, "total_steps": 1972, "loss": 0.3054, "lr": 5e-06, "epoch": 3.6268354430379746, "percentage": 90.77, "elapsed_time": "14:14:29", "remaining_time": "1:26:52"}
{"current_steps": 1800, "total_steps": 1972, "loss": 0.3089, "lr": 5e-06, "epoch": 3.6470886075949367, "percentage": 91.28, "elapsed_time": "14:19:10", "remaining_time": "1:22:05"}
{"current_steps": 1810, "total_steps": 1972, "loss": 0.3141, "lr": 5e-06, "epoch": 3.667341772151899, "percentage": 91.78, "elapsed_time": "14:23:52", "remaining_time": "1:17:19"}
{"current_steps": 1820, "total_steps": 1972, "loss": 0.3093, "lr": 5e-06, "epoch": 3.6875949367088605, "percentage": 92.29, "elapsed_time": "14:28:33", "remaining_time": "1:12:32"}
{"current_steps": 1830, "total_steps": 1972, "loss": 0.3105, "lr": 5e-06, "epoch": 3.7078481012658226, "percentage": 92.8, "elapsed_time": "14:33:14", "remaining_time": "1:07:45"}
{"current_steps": 1840, "total_steps": 1972, "loss": 0.3124, "lr": 5e-06, "epoch": 3.728101265822785, "percentage": 93.31, "elapsed_time": "14:37:56", "remaining_time": "1:02:58"}
{"current_steps": 1850, "total_steps": 1972, "loss": 0.3099, "lr": 5e-06, "epoch": 3.748354430379747, "percentage": 93.81, "elapsed_time": "14:42:38", "remaining_time": "0:58:12"}
{"current_steps": 1860, "total_steps": 1972, "loss": 0.3173, "lr": 5e-06, "epoch": 3.768607594936709, "percentage": 94.32, "elapsed_time": "14:47:19", "remaining_time": "0:53:25"}
{"current_steps": 1870, "total_steps": 1972, "loss": 0.3145, "lr": 5e-06, "epoch": 3.7888607594936707, "percentage": 94.83, "elapsed_time": "14:52:01", "remaining_time": "0:48:39"}
{"current_steps": 1880, "total_steps": 1972, "loss": 0.3154, "lr": 5e-06, "epoch": 3.809113924050633, "percentage": 95.33, "elapsed_time": "14:56:42", "remaining_time": "0:43:52"}
{"current_steps": 1890, "total_steps": 1972, "loss": 0.315, "lr": 5e-06, "epoch": 3.829367088607595, "percentage": 95.84, "elapsed_time": "15:01:24", "remaining_time": "0:39:06"}
{"current_steps": 1900, "total_steps": 1972, "loss": 0.3157, "lr": 5e-06, "epoch": 3.8496202531645567, "percentage": 96.35, "elapsed_time": "15:06:05", "remaining_time": "0:34:20"}
{"current_steps": 1910, "total_steps": 1972, "loss": 0.3117, "lr": 5e-06, "epoch": 3.869873417721519, "percentage": 96.86, "elapsed_time": "15:10:47", "remaining_time": "0:29:33"}
{"current_steps": 1920, "total_steps": 1972, "loss": 0.3172, "lr": 5e-06, "epoch": 3.890126582278481, "percentage": 97.36, "elapsed_time": "15:15:28", "remaining_time": "0:24:47"}
{"current_steps": 1930, "total_steps": 1972, "loss": 0.3147, "lr": 5e-06, "epoch": 3.910379746835443, "percentage": 97.87, "elapsed_time": "15:20:10", "remaining_time": "0:20:01"}
{"current_steps": 1940, "total_steps": 1972, "loss": 0.312, "lr": 5e-06, "epoch": 3.9306329113924052, "percentage": 98.38, "elapsed_time": "15:24:51", "remaining_time": "0:15:15"}
{"current_steps": 1950, "total_steps": 1972, "loss": 0.3164, "lr": 5e-06, "epoch": 3.9508860759493674, "percentage": 98.88, "elapsed_time": "15:29:33", "remaining_time": "0:10:29"}
{"current_steps": 1960, "total_steps": 1972, "loss": 0.3175, "lr": 5e-06, "epoch": 3.971139240506329, "percentage": 99.39, "elapsed_time": "15:34:15", "remaining_time": "0:05:43"}
{"current_steps": 1970, "total_steps": 1972, "loss": 0.3172, "lr": 5e-06, "epoch": 3.991392405063291, "percentage": 99.9, "elapsed_time": "15:38:56", "remaining_time": "0:00:57"}
{"current_steps": 1972, "total_steps": 1972, "eval_loss": 0.1617216169834137, "epoch": 3.9954430379746837, "percentage": 100.0, "elapsed_time": "15:45:01", "remaining_time": "0:00:00"}
{"current_steps": 1972, "total_steps": 1972, "epoch": 3.9954430379746837, "percentage": 100.0, "elapsed_time": "15:46:02", "remaining_time": "0:00:00"}