diff --git "a/run-2024-10-25T01:17:54+00:00.log" "b/run-2024-10-25T01:17:54+00:00.log" --- "a/run-2024-10-25T01:17:54+00:00.log" +++ "b/run-2024-10-25T01:17:54+00:00.log" @@ -4302,4 +4302,1069 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' self.pid = os.fork() 25%|██▌ | 15479/61904 [8:04:32<7294:49:21, 565.67s/it] 25%|██▌ | 15480/61904 [8:04:33<5112:10:07, 396.43s/it] {'loss': 2.7449, 'learning_rate': 1.75236613509659e-07, 'epoch': 4.0} 25%|██▌ | 15480/61904 [8:04:33<5112:10:07, 396.43s/it] 25%|██▌ | 15481/61904 [8:04:35<3583:41:38, 277.91s/it] 25%|██▌ | 15482/61904 [8:04:36<2513:58:58, 194.96s/it] 25%|██▌ | 15483/61904 [8:04:38<1766:08:24, 136.97s/it] 25%|██▌ | 15484/61904 [8:04:39<1242:39:25, 96.37s/it] 25%|██▌ | 15485/61904 [8:04:41<874:35:36, 67.83s/it] 25%|██▌ | 15486/61904 [8:04:42<617:24:03, 47.88s/it] 25%|██▌ | 15487/61904 [8:04:43<437:38:39, 33.94s/it] 25%|██▌ | 15488/61904 [8:04:45<311:45:10, 24.18s/it] 25%|██▌ | 15489/61904 [8:04:46<223:45:15, 17.35s/it] 25%|██▌ | 15490/61904 [8:04:48<161:55:21, 12.56s/it] 25%|██▌ | 15491/61904 [8:04:49<118:26:20, 9.19s/it] 25%|██▌ | 15492/61904 [8:04:50<88:31:35, 6.87s/it] 25%|██▌ | 15493/61904 [8:04:52<67:05:47, 5.20s/it] 25%|██▌ | 15494/61904 [8:04:53<52:12:21, 4.05s/it] 25%|██▌ | 15495/61904 [8:04:55<42:32:50, 3.30s/it] 25%|██▌ | 15496/61904 [8:04:56<35:14:31, 2.73s/it] 25%|██▌ | 15497/61904 [8:04:58<30:37:37, 2.38s/it] 25%|██▌ | 15498/61904 [8:04:59<26:36:55, 2.06s/it] 25%|██▌ | 15499/61904 [8:05:00<23:28:07, 1.82s/it] 25%|██▌ | 15500/61904 [8:05:02<22:19:47, 1.73s/it] {'loss': 2.7855, 'learning_rate': 1.752042007001167e-07, 'epoch': 4.01} - 25%|██▌ | 15500/61904 [8:05:02<22:19:47, 1.73s/it] 25%|██▌ | 15501/61904 [8:05:03<21:21:07, 1.66s/it] 25%|██▌ | 15502/61904 [8:05:04<20:07:57, 1.56s/it] 25%|██▌ | 15503/61904 [8:05:06<19:35:31, 1.52s/it] 25%|██▌ | 15504/61904 [8:05:07<19:10:55, 1.49s/it] 25%|██▌ | 15505/61904 [8:05:09<19:07:14, 1.48s/it] 25%|██▌ | 15506/61904 [8:05:10<18:49:05, 1.46s/it] 25%|██▌ | 15507/61904 [8:05:12<18:18:22, 1.42s/it] 25%|██▌ | 15508/61904 [8:05:13<17:48:31, 1.38s/it] 25%|██▌ | 15509/61904 [8:05:14<18:07:42, 1.41s/it] 25%|██▌ | 15510/61904 [8:05:16<17:52:06, 1.39s/it] 25%|██▌ | 15511/61904 [8:05:17<18:04:05, 1.40s/it] 25%|██▌ | 15512/61904 [8:05:18<17:44:27, 1.38s/it] 25%|██▌ | 15513/61904 [8:05:20<17:35:33, 1.37s/it] 25%|██▌ | 15514/61904 [8:05:21<17:53:45, 1.39s/it] 25%|██▌ | 15515/61904 [8:05:22<17:16:34, 1.34s/it] 25%|██▌ | 15516/61904 [8:05:24<17:45:27, 1.38s/it] 25%|██▌ | 15517/61904 [8:05:25<17:25:21, 1.35s/it] \ No newline at end of file + 25%|██▌ | 15500/61904 [8:05:02<22:19:47, 1.73s/it] 25%|██▌ | 15501/61904 [8:05:03<21:21:07, 1.66s/it] 25%|██▌ | 15502/61904 [8:05:04<20:07:57, 1.56s/it] 25%|██▌ | 15503/61904 [8:05:06<19:35:31, 1.52s/it] 25%|██▌ | 15504/61904 [8:05:07<19:10:55, 1.49s/it] 25%|██▌ | 15505/61904 [8:05:09<19:07:14, 1.48s/it] 25%|██▌ | 15506/61904 [8:05:10<18:49:05, 1.46s/it] 25%|██▌ | 15507/61904 [8:05:12<18:18:22, 1.42s/it] 25%|██▌ | 15508/61904 [8:05:13<17:48:31, 1.38s/it] 25%|██▌ | 15509/61904 [8:05:14<18:07:42, 1.41s/it] 25%|██▌ | 15510/61904 [8:05:16<17:52:06, 1.39s/it] 25%|██▌ | 15511/61904 [8:05:17<18:04:05, 1.40s/it] 25%|██▌ | 15512/61904 [8:05:18<17:44:27, 1.38s/it] 25%|██▌ | 15513/61904 [8:05:20<17:35:33, 1.37s/it] 25%|██▌ | 15514/61904 [8:05:21<17:53:45, 1.39s/it] 25%|██▌ | 15515/61904 [8:05:22<17:16:34, 1.34s/it] 25%|██▌ | 15516/61904 [8:05:24<17:45:27, 1.38s/it] 25%|██▌ | 15517/61904 [8:05:25<17:25:21, 1.35s/it] 25%|██▌ | 15518/61904 [8:05:27<17:40:34, 1.37s/it] 25%|██▌ | 15519/61904 [8:05:28<17:47:49, 1.38s/it] 25%|██▌ | 15520/61904 [8:05:30<19:10:55, 1.49s/it] {'loss': 2.7415, 'learning_rate': 1.7517178789057433e-07, 'epoch': 4.01} + 25%|██▌ | 15520/61904 [8:05:30<19:10:55, 1.49s/it] 25%|██▌ | 15521/61904 [8:05:31<18:53:46, 1.47s/it] 25%|██▌ | 15522/61904 [8:05:32<18:26:11, 1.43s/it] 25%|██▌ | 15523/61904 [8:05:34<18:09:28, 1.41s/it] 25%|██▌ | 15524/61904 [8:05:35<18:05:25, 1.40s/it] 25%|██▌ | 15525/61904 [8:05:37<18:08:10, 1.41s/it] 25%|██▌ | 15526/61904 [8:05:38<18:07:36, 1.41s/it] 25%|██▌ | 15527/61904 [8:05:39<17:48:18, 1.38s/it] 25%|██▌ | 15528/61904 [8:05:41<17:50:11, 1.38s/it] 25%|██▌ | 15529/61904 [8:05:42<17:32:54, 1.36s/it] 25%|██▌ | 15530/61904 [8:05:43<17:18:13, 1.34s/it] 25%|██▌ | 15531/61904 [8:05:45<17:19:30, 1.34s/it] 25%|██▌ | 15532/61904 [8:05:46<17:14:03, 1.34s/it] 25%|██▌ | 15533/61904 [8:05:47<17:18:56, 1.34s/it] 25%|██▌ | 15534/61904 [8:05:49<17:51:22, 1.39s/it] 25%|██▌ | 15535/61904 [8:05:50<17:57:43, 1.39s/it] 25%|██▌ | 15536/61904 [8:05:52<17:52:54, 1.39s/it] 25%|██▌ | 15537/61904 [8:05:53<17:49:15, 1.38s/it] 25%|██▌ | 15538/61904 [8:05:54<17:33:38, 1.36s/it] 25%|██▌ | 15539/61904 [8:05:56<18:07:40, 1.41s/it] 25%|██▌ | 15540/61904 [8:05:57<17:59:27, 1.40s/it] {'loss': 2.6781, 'learning_rate': 1.7513937508103202e-07, 'epoch': 4.02} + 25%|██▌ | 15540/61904 [8:05:57<17:59:27, 1.40s/it] 25%|██▌ | 15541/61904 [8:05:59<17:50:33, 1.39s/it] 25%|██▌ | 15542/61904 [8:06:00<18:40:53, 1.45s/it] 25%|██▌ | 15543/61904 [8:06:02<18:41:08, 1.45s/it] 25%|██▌ | 15544/61904 [8:06:03<18:18:21, 1.42s/it] 25%|██▌ | 15545/61904 [8:06:04<18:13:19, 1.42s/it] 25%|██▌ | 15546/61904 [8:06:06<17:42:12, 1.37s/it] 25%|██▌ | 15547/61904 [8:06:07<18:12:33, 1.41s/it] 25%|██▌ | 15548/61904 [8:06:09<18:03:49, 1.40s/it] 25%|██▌ | 15549/61904 [8:06:10<17:54:45, 1.39s/it] 25%|██▌ | 15550/61904 [8:06:11<18:17:23, 1.42s/it] 25%|██▌ | 15551/61904 [8:06:13<18:29:32, 1.44s/it] 25%|██▌ | 15552/61904 [8:06:14<18:28:02, 1.43s/it] 25%|██▌ | 15553/61904 [8:06:16<18:36:15, 1.44s/it] 25%|██▌ | 15554/61904 [8:06:17<18:25:29, 1.43s/it] 25%|██▌ | 15555/61904 [8:06:19<18:16:59, 1.42s/it] 25%|██▌ | 15556/61904 [8:06:20<17:48:46, 1.38s/it] 25%|██▌ | 15557/61904 [8:06:21<18:12:09, 1.41s/it] 25%|██▌ | 15558/61904 [8:06:23<18:28:44, 1.44s/it] 25%|██▌ | 15559/61904 [8:06:24<18:34:48, 1.44s/it] 25%|██▌ | 15560/61904 [8:06:26<18:01:36, 1.40s/it] {'loss': 2.7411, 'learning_rate': 1.751069622714897e-07, 'epoch': 4.02} + 25%|██▌ | 15560/61904 [8:06:26<18:01:36, 1.40s/it] 25%|██▌ | 15561/61904 [8:06:27<18:16:51, 1.42s/it] 25%|██▌ | 15562/61904 [8:06:28<18:07:42, 1.41s/it] 25%|██▌ | 15563/61904 [8:06:30<18:03:02, 1.40s/it] 25%|██▌ | 15564/61904 [8:06:31<18:37:48, 1.45s/it] 25%|██▌ | 15565/61904 [8:06:33<18:27:02, 1.43s/it] 25%|██▌ | 15566/61904 [8:06:34<18:17:25, 1.42s/it] 25%|██▌ | 15567/61904 [8:06:36<18:58:36, 1.47s/it] 25%|██▌ | 15568/61904 [8:06:37<18:21:52, 1.43s/it] 25%|██▌ | 15569/61904 [8:06:39<18:28:40, 1.44s/it] 25%|██▌ | 15570/61904 [8:06:40<18:12:58, 1.42s/it] 25%|██▌ | 15571/61904 [8:06:41<18:09:38, 1.41s/it] 25%|██▌ | 15572/61904 [8:06:43<18:39:34, 1.45s/it] 25%|██▌ | 15573/61904 [8:06:44<17:59:25, 1.40s/it] 25%|██▌ | 15574/61904 [8:06:46<18:09:24, 1.41s/it] 25%|██▌ | 15575/61904 [8:06:47<18:01:10, 1.40s/it] 25%|██▌ | 15576/61904 [8:06:48<18:11:38, 1.41s/it] 25%|██▌ | 15577/61904 [8:06:50<18:18:32, 1.42s/it] 25%|██▌ | 15578/61904 [8:06:51<18:07:16, 1.41s/it] 25%|██▌ | 15579/61904 [8:06:53<19:07:26, 1.49s/it] 25%|██▌ | 15580/61904 [8:06:54<19:02:25, 1.48s/it] {'loss': 2.7761, 'learning_rate': 1.7507454946194734e-07, 'epoch': 4.03} + 25%|██▌ | 15580/61904 [8:06:54<19:02:25, 1.48s/it] 25%|██▌ | 15581/61904 [8:06:56<19:07:13, 1.49s/it] 25%|██▌ | 15582/61904 [8:06:57<19:11:27, 1.49s/it] 25%|██▌ | 15583/61904 [8:06:59<19:16:40, 1.50s/it] 25%|██▌ | 15584/61904 [8:07:00<18:58:17, 1.47s/it] 25%|██▌ | 15585/61904 [8:07:02<19:01:56, 1.48s/it] 25%|██▌ | 15586/61904 [8:07:03<18:34:05, 1.44s/it] 25%|██▌ | 15587/61904 [8:07:05<18:23:03, 1.43s/it] 25%|██▌ | 15588/61904 [8:07:06<17:55:22, 1.39s/it] 25%|██▌ | 15589/61904 [8:07:07<18:07:12, 1.41s/it] 25%|██▌ | 15590/61904 [8:07:09<17:41:04, 1.37s/it] 25%|██▌ | 15591/61904 [8:07:10<17:43:19, 1.38s/it] 25%|██▌ | 15592/61904 [8:07:11<17:07:46, 1.33s/it] 25%|██▌ | 15593/61904 [8:07:13<17:36:13, 1.37s/it] 25%|██▌ | 15594/61904 [8:07:14<17:19:16, 1.35s/it] 25%|██▌ | 15595/61904 [8:07:15<17:10:25, 1.34s/it] 25%|██▌ | 15596/61904 [8:07:17<17:40:51, 1.37s/it] 25%|██▌ | 15597/61904 [8:07:18<17:20:32, 1.35s/it] 25%|██▌ | 15598/61904 [8:07:19<17:35:26, 1.37s/it] 25%|██▌ | 15599/61904 [8:07:21<17:52:54, 1.39s/it] 25%|██▌ | 15600/61904 [8:07:22<17:05:57, 1.33s/it] {'loss': 2.7267, 'learning_rate': 1.7504213665240503e-07, 'epoch': 4.03} + 25%|██▌ | 15600/61904 [8:07:22<17:05:57, 1.33s/it] 25%|██▌ | 15601/61904 [8:07:23<17:12:36, 1.34s/it] 25%|██▌ | 15602/61904 [8:07:25<17:21:52, 1.35s/it] 25%|██▌ | 15603/61904 [8:07:26<17:03:30, 1.33s/it] 25%|██▌ | 15604/61904 [8:07:27<17:27:19, 1.36s/it] 25%|██▌ | 15605/61904 [8:07:29<17:46:19, 1.38s/it] 25%|██▌ | 15606/61904 [8:07:30<17:18:14, 1.35s/it] 25%|██▌ | 15607/61904 [8:07:32<17:11:50, 1.34s/it] 25%|██▌ | 15608/61904 [8:07:33<17:54:12, 1.39s/it] 25%|██▌ | 15609/61904 [8:07:34<17:31:46, 1.36s/it] 25%|██▌ | 15610/61904 [8:07:36<17:45:48, 1.38s/it] 25%|██▌ | 15611/61904 [8:07:37<17:51:32, 1.39s/it] 25%|██▌ | 15612/61904 [8:07:38<17:35:57, 1.37s/it] 25%|██▌ | 15613/61904 [8:07:40<17:59:41, 1.40s/it] 25%|██▌ | 15614/61904 [8:07:41<17:44:59, 1.38s/it] 25%|██▌ | 15615/61904 [8:07:43<17:39:03, 1.37s/it] 25%|██▌ | 15616/61904 [8:07:44<17:44:36, 1.38s/it] 25%|██▌ | 15617/61904 [8:07:45<17:36:22, 1.37s/it] 25%|██▌ | 15618/61904 [8:07:47<17:34:29, 1.37s/it] 25%|██▌ | 15619/61904 [8:07:48<17:24:08, 1.35s/it] 25%|██▌ | 15620/61904 [8:07:49<17:10:02, 1.34s/it] {'loss': 2.663, 'learning_rate': 1.750097238428627e-07, 'epoch': 4.04} + 25%|██▌ | 15620/61904 [8:07:49<17:10:02, 1.34s/it] 25%|██▌ | 15621/61904 [8:07:51<17:14:49, 1.34s/it] 25%|██▌ | 15622/61904 [8:07:52<17:08:39, 1.33s/it] 25%|██▌ | 15623/61904 [8:07:53<17:09:25, 1.33s/it] 25%|██▌ | 15624/61904 [8:07:55<17:18:24, 1.35s/it] 25%|██▌ | 15625/61904 [8:07:56<17:16:12, 1.34s/it] 25%|██▌ | 15626/61904 [8:07:58<17:37:34, 1.37s/it] 25%|██▌ | 15627/61904 [8:07:59<17:22:34, 1.35s/it] 25%|██▌ | 15628/61904 [8:08:00<17:29:28, 1.36s/it] 25%|██▌ | 15629/61904 [8:08:02<17:18:10, 1.35s/it] 25%|██▌ | 15630/61904 [8:08:03<17:47:55, 1.38s/it] 25%|██▌ | 15631/61904 [8:08:04<17:50:47, 1.39s/it] 25%|██▌ | 15632/61904 [8:08:06<17:58:24, 1.40s/it] 25%|██▌ | 15633/61904 [8:08:07<17:39:08, 1.37s/it] 25%|██▌ | 15634/61904 [8:08:09<17:57:59, 1.40s/it] 25%|██▌ | 15635/61904 [8:08:10<17:44:19, 1.38s/it] 25%|██▌ | 15636/61904 [8:08:11<17:04:04, 1.33s/it] 25%|██▌ | 15637/61904 [8:08:12<17:12:50, 1.34s/it] 25%|██▌ | 15638/61904 [8:08:14<17:08:15, 1.33s/it] 25%|██▌ | 15639/61904 [8:08:16<18:50:40, 1.47s/it] 25%|██▌ | 15640/61904 [8:08:17<19:01:14, 1.48s/it] {'loss': 2.7146, 'learning_rate': 1.7497731103332035e-07, 'epoch': 4.04} + 25%|██▌ | 15640/61904 [8:08:17<19:01:14, 1.48s/it] 25%|██▌ | 15641/61904 [8:08:18<18:31:33, 1.44s/it] 25%|██▌ | 15642/61904 [8:08:20<18:25:54, 1.43s/it] 25%|██▌ | 15643/61904 [8:08:21<18:11:13, 1.42s/it] 25%|██▌ | 15644/61904 [8:08:22<17:31:45, 1.36s/it] 25%|██▌ | 15645/61904 [8:08:24<17:04:38, 1.33s/it] 25%|██▌ | 15646/61904 [8:08:25<17:40:13, 1.38s/it] 25%|██▌ | 15647/61904 [8:08:27<18:14:10, 1.42s/it] 25%|██▌ | 15648/61904 [8:08:28<18:15:19, 1.42s/it] 25%|██▌ | 15649/61904 [8:08:30<17:58:45, 1.40s/it] 25%|██▌ | 15650/61904 [8:08:31<17:56:50, 1.40s/it] 25%|██▌ | 15651/61904 [8:08:32<18:23:26, 1.43s/it] 25%|██▌ | 15652/61904 [8:08:34<17:47:57, 1.39s/it] 25%|██▌ | 15653/61904 [8:08:35<17:38:24, 1.37s/it] 25%|██▌ | 15654/61904 [8:08:36<17:52:12, 1.39s/it] 25%|██▌ | 15655/61904 [8:08:38<17:40:03, 1.38s/it] 25%|██▌ | 15656/61904 [8:08:39<17:31:37, 1.36s/it] 25%|██▌ | 15657/61904 [8:08:41<18:05:54, 1.41s/it] 25%|██▌ | 15658/61904 [8:08:42<17:36:44, 1.37s/it] 25%|██▌ | 15659/61904 [8:08:43<17:09:44, 1.34s/it] 25%|██▌ | 15660/61904 [8:08:44<17:00:31, 1.32s/it] {'loss': 2.7308, 'learning_rate': 1.7494489822377804e-07, 'epoch': 4.05} + 25%|██▌ | 15660/61904 [8:08:44<17:00:31, 1.32s/it] 25%|██▌ | 15661/61904 [8:08:46<16:34:35, 1.29s/it] 25%|██▌ | 15662/61904 [8:08:47<16:44:17, 1.30s/it] 25%|██▌ | 15663/61904 [8:08:48<17:05:34, 1.33s/it] 25%|██▌ | 15664/61904 [8:08:50<17:10:29, 1.34s/it] 25%|██▌ | 15665/61904 [8:08:51<17:09:39, 1.34s/it] 25%|██▌ | 15666/61904 [8:08:52<17:06:31, 1.33s/it] 25%|██▌ | 15667/61904 [8:08:54<17:07:22, 1.33s/it] 25%|██▌ | 15668/61904 [8:08:55<16:44:14, 1.30s/it] 25%|██▌ | 15669/61904 [8:08:56<16:49:45, 1.31s/it] 25%|██▌ | 15670/61904 [8:08:58<16:59:50, 1.32s/it] 25%|██▌ | 15671/61904 [8:08:59<16:41:05, 1.30s/it] 25%|██▌ | 15672/61904 [8:09:00<16:46:43, 1.31s/it] 25%|██▌ | 15673/61904 [8:09:02<16:47:49, 1.31s/it] 25%|██▌ | 15674/61904 [8:09:03<16:45:21, 1.30s/it] 25%|██▌ | 15675/61904 [8:09:04<17:33:30, 1.37s/it] 25%|██▌ | 15676/61904 [8:09:06<17:32:02, 1.37s/it] 25%|██▌ | 15677/61904 [8:09:07<17:56:23, 1.40s/it] 25%|██▌ | 15678/61904 [8:09:09<18:03:53, 1.41s/it] 25%|██▌ | 15679/61904 [8:09:10<17:32:57, 1.37s/it] 25%|██▌ | 15680/61904 [8:09:11<17:51:27, 1.39s/it] {'loss': 2.7472, 'learning_rate': 1.749124854142357e-07, 'epoch': 4.05} + 25%|██▌ | 15680/61904 [8:09:11<17:51:27, 1.39s/it] 25%|██▌ | 15681/61904 [8:09:13<18:08:57, 1.41s/it] 25%|██▌ | 15682/61904 [8:09:14<17:47:35, 1.39s/it] 25%|██▌ | 15683/61904 [8:09:15<17:33:49, 1.37s/it] 25%|██▌ | 15684/61904 [8:09:17<18:18:08, 1.43s/it] 25%|██▌ | 15685/61904 [8:09:18<18:01:28, 1.40s/it] 25%|██▌ | 15686/61904 [8:09:20<17:45:41, 1.38s/it] 25%|██▌ | 15687/61904 [8:09:21<17:33:14, 1.37s/it] 25%|██▌ | 15688/61904 [8:09:22<17:32:20, 1.37s/it] 25%|██▌ | 15689/61904 [8:09:24<17:49:28, 1.39s/it] 25%|██▌ | 15690/61904 [8:09:25<17:41:46, 1.38s/it] 25%|██▌ | 15691/61904 [8:09:27<18:04:16, 1.41s/it] 25%|██▌ | 15692/61904 [8:09:28<18:18:38, 1.43s/it] 25%|██▌ | 15693/61904 [8:09:29<17:55:34, 1.40s/it] 25%|██▌ | 15694/61904 [8:09:31<18:58:22, 1.48s/it] 25%|██▌ | 15695/61904 [8:09:33<18:29:35, 1.44s/it] 25%|██▌ | 15696/61904 [8:09:34<18:21:57, 1.43s/it] 25%|██▌ | 15697/61904 [8:09:35<18:11:00, 1.42s/it] 25%|██▌ | 15698/61904 [8:09:37<18:14:57, 1.42s/it] 25%|██▌ | 15699/61904 [8:09:38<18:02:00, 1.41s/it] 25%|██▌ | 15700/61904 [8:09:40<18:01:17, 1.40s/it] {'loss': 2.7101, 'learning_rate': 1.7488007260469336e-07, 'epoch': 4.06} + 25%|██▌ | 15700/61904 [8:09:40<18:01:17, 1.40s/it] 25%|██▌ | 15701/61904 [8:09:41<17:53:15, 1.39s/it] 25%|██▌ | 15702/61904 [8:09:42<17:43:49, 1.38s/it] 25%|██▌ | 15703/61904 [8:09:44<17:38:36, 1.37s/it] 25%|██▌ | 15704/61904 [8:09:45<16:54:07, 1.32s/it] 25%|██▌ | 15705/61904 [8:09:46<17:08:10, 1.34s/it] 25%|██▌ | 15706/61904 [8:09:48<17:24:32, 1.36s/it] 25%|██▌ | 15707/61904 [8:09:49<17:41:56, 1.38s/it] 25%|██▌ | 15708/61904 [8:09:50<17:46:42, 1.39s/it] 25%|██▌ | 15709/61904 [8:09:52<17:30:57, 1.37s/it] 25%|██▌ | 15710/61904 [8:09:53<17:27:01, 1.36s/it] 25%|██▌ | 15711/61904 [8:09:54<17:34:19, 1.37s/it] 25%|██▌ | 15712/61904 [8:09:56<17:45:41, 1.38s/it] 25%|██▌ | 15713/61904 [8:09:57<17:17:03, 1.35s/it] 25%|██▌ | 15714/61904 [8:09:58<17:02:03, 1.33s/it] 25%|██▌ | 15715/61904 [8:10:00<16:29:01, 1.28s/it] 25%|██▌ | 15716/61904 [8:10:01<16:31:18, 1.29s/it] 25%|██▌ | 15717/61904 [8:10:02<16:34:17, 1.29s/it] 25%|██▌ | 15718/61904 [8:10:03<16:32:02, 1.29s/it] 25%|██▌ | 15719/61904 [8:10:05<17:26:32, 1.36s/it] 25%|██▌ | 15720/61904 [8:10:07<18:03:33, 1.41s/it] {'loss': 2.7391, 'learning_rate': 1.7484765979515105e-07, 'epoch': 4.06} + 25%|██▌ | 15720/61904 [8:10:07<18:03:33, 1.41s/it] 25%|██▌ | 15721/61904 [8:10:08<18:15:41, 1.42s/it] 25%|██▌ | 15722/61904 [8:10:09<18:01:16, 1.40s/it] 25%|██▌ | 15723/61904 [8:10:11<17:53:51, 1.40s/it] 25%|██▌ | 15724/61904 [8:10:12<17:45:03, 1.38s/it] 25%|██▌ | 15725/61904 [8:10:13<17:33:41, 1.37s/it] 25%|██▌ | 15726/61904 [8:10:15<18:02:03, 1.41s/it] 25%|██▌ | 15727/61904 [8:10:16<17:50:32, 1.39s/it] 25%|██▌ | 15728/61904 [8:10:18<18:13:28, 1.42s/it] 25%|██▌ | 15729/61904 [8:10:19<18:23:45, 1.43s/it] 25%|██▌ | 15730/61904 [8:10:21<19:00:14, 1.48s/it] 25%|██▌ | 15731/61904 [8:10:22<18:11:56, 1.42s/it] 25%|██▌ | 15732/61904 [8:10:23<17:48:46, 1.39s/it] 25%|██▌ | 15733/61904 [8:10:25<17:39:39, 1.38s/it] 25%|██▌ | 15734/61904 [8:10:26<17:34:12, 1.37s/it] 25%|██▌ | 15735/61904 [8:10:27<17:14:25, 1.34s/it] 25%|██▌ | 15736/61904 [8:10:29<17:27:17, 1.36s/it] 25%|██▌ | 15737/61904 [8:10:30<17:19:18, 1.35s/it] 25%|██▌ | 15738/61904 [8:10:31<17:30:40, 1.37s/it] 25%|██▌ | 15739/61904 [8:10:33<17:48:48, 1.39s/it] 25%|██▌ | 15740/61904 [8:10:34<17:38:26, 1.38s/it] {'loss': 2.6709, 'learning_rate': 1.748152469856087e-07, 'epoch': 4.07} + 25%|██▌ | 15740/61904 [8:10:34<17:38:26, 1.38s/it] 25%|██▌ | 15741/61904 [8:10:36<18:13:17, 1.42s/it] 25%|██▌ | 15742/61904 [8:10:37<18:18:27, 1.43s/it] 25%|██▌ | 15743/61904 [8:10:39<17:48:34, 1.39s/it] 25%|██▌ | 15744/61904 [8:10:40<17:25:46, 1.36s/it] 25%|██▌ | 15745/61904 [8:10:41<17:32:47, 1.37s/it] 25%|██▌ | 15746/61904 [8:10:42<16:49:13, 1.31s/it] 25%|██▌ | 15747/61904 [8:10:44<17:20:27, 1.35s/it] 25%|██▌ | 15748/61904 [8:10:45<17:01:13, 1.33s/it] 25%|██▌ | 15749/61904 [8:10:46<17:03:32, 1.33s/it] 25%|██▌ | 15750/61904 [8:10:48<17:32:04, 1.37s/it] 25%|██▌ | 15751/61904 [8:10:49<17:08:38, 1.34s/it] 25%|██▌ | 15752/61904 [8:10:51<17:50:01, 1.39s/it] 25%|██▌ | 15753/61904 [8:10:52<17:39:10, 1.38s/it] 25%|██▌ | 15754/61904 [8:10:53<17:40:36, 1.38s/it] 25%|██▌ | 15755/61904 [8:10:55<17:15:45, 1.35s/it] 25%|██▌ | 15756/61904 [8:10:56<17:22:08, 1.35s/it] 25%|██▌ | 15757/61904 [8:10:58<17:50:06, 1.39s/it] 25%|██▌ | 15758/61904 [8:10:59<17:21:17, 1.35s/it] 25%|██▌ | 15759/61904 [8:11:00<17:17:15, 1.35s/it] 25%|██▌ | 15760/61904 [8:11:01<17:14:30, 1.35s/it] {'loss': 2.6802, 'learning_rate': 1.7478283417606638e-07, 'epoch': 4.07} + 25%|██▌ | 15760/61904 [8:11:01<17:14:30, 1.35s/it] 25%|██▌ | 15761/61904 [8:11:03<17:48:40, 1.39s/it] 25%|██▌ | 15762/61904 [8:11:04<17:39:16, 1.38s/it] 25%|██▌ | 15763/61904 [8:11:06<17:23:38, 1.36s/it] 25%|██▌ | 15764/61904 [8:11:07<17:22:51, 1.36s/it] 25%|██▌ | 15765/61904 [8:11:08<16:44:06, 1.31s/it] 25%|██▌ | 15766/61904 [8:11:10<17:14:09, 1.34s/it] 25%|██▌ | 15767/61904 [8:11:11<17:02:03, 1.33s/it] 25%|██▌ | 15768/61904 [8:11:12<17:09:52, 1.34s/it] 25%|██▌ | 15769/61904 [8:11:14<17:14:39, 1.35s/it] 25%|██▌ | 15770/61904 [8:11:15<17:05:17, 1.33s/it] 25%|██▌ | 15771/61904 [8:11:16<16:49:26, 1.31s/it] 25%|██▌ | 15772/61904 [8:11:18<16:48:45, 1.31s/it] 25%|██▌ | 15773/61904 [8:11:19<16:55:36, 1.32s/it] 25%|██▌ | 15774/61904 [8:11:20<16:55:16, 1.32s/it] 25%|██▌ | 15775/61904 [8:11:22<16:55:52, 1.32s/it] 25%|██▌ | 15776/61904 [8:11:23<16:56:02, 1.32s/it] 25%|██▌ | 15777/61904 [8:11:24<16:45:27, 1.31s/it] 25%|██▌ | 15778/61904 [8:11:25<16:50:16, 1.31s/it] 25%|██▌ | 15779/61904 [8:11:27<17:03:48, 1.33s/it] 25%|██▌ | 15780/61904 [8:11:28<16:57:41, 1.32s/it] {'loss': 2.6961, 'learning_rate': 1.7475042136652407e-07, 'epoch': 4.08} + 25%|██▌ | 15780/61904 [8:11:28<16:57:41, 1.32s/it] 25%|██▌ | 15781/61904 [8:11:29<17:03:45, 1.33s/it] 25%|██▌ | 15782/61904 [8:11:31<17:06:13, 1.34s/it] 25%|██▌ | 15783/61904 [8:11:32<17:10:16, 1.34s/it] 25%|██▌ | 15784/61904 [8:11:34<17:20:08, 1.35s/it] 25%|██▌ | 15785/61904 [8:11:35<16:54:25, 1.32s/it] 26%|██▌ | 15786/61904 [8:11:36<17:12:45, 1.34s/it] 26%|██▌ | 15787/61904 [8:11:38<17:11:42, 1.34s/it] 26%|██▌ | 15788/61904 [8:11:39<17:52:09, 1.39s/it] 26%|██▌ | 15789/61904 [8:11:40<17:44:51, 1.39s/it] 26%|██▌ | 15790/61904 [8:11:42<17:41:50, 1.38s/it] 26%|██▌ | 15791/61904 [8:11:43<17:26:02, 1.36s/it] 26%|██▌ | 15792/61904 [8:11:44<17:20:22, 1.35s/it] 26%|██▌ | 15793/61904 [8:11:46<18:01:02, 1.41s/it] 26%|██▌ | 15794/61904 [8:11:47<17:28:23, 1.36s/it] 26%|██▌ | 15795/61904 [8:11:49<17:38:56, 1.38s/it] 26%|██▌ | 15796/61904 [8:11:50<17:21:12, 1.35s/it] 26%|██▌ | 15797/61904 [8:11:51<16:40:43, 1.30s/it] 26%|██▌ | 15798/61904 [8:11:52<16:57:04, 1.32s/it] 26%|██▌ | 15799/61904 [8:11:54<17:02:25, 1.33s/it] 26%|██▌ | 15800/61904 [8:11:55<17:12:04, 1.34s/it] {'loss': 2.7584, 'learning_rate': 1.747180085569817e-07, 'epoch': 4.08} + 26%|██▌ | 15800/61904 [8:11:55<17:12:04, 1.34s/it] 26%|██▌ | 15801/61904 [8:11:57<18:00:21, 1.41s/it] 26%|██▌ | 15802/61904 [8:11:58<17:48:53, 1.39s/it] 26%|██▌ | 15803/61904 [8:11:59<17:16:44, 1.35s/it] 26%|██▌ | 15804/61904 [8:12:01<17:26:13, 1.36s/it] 26%|██▌ | 15805/61904 [8:12:02<17:13:04, 1.34s/it] 26%|██▌ | 15806/61904 [8:12:03<17:19:16, 1.35s/it] 26%|██▌ | 15807/61904 [8:12:05<17:03:01, 1.33s/it] 26%|██▌ | 15808/61904 [8:12:06<17:26:00, 1.36s/it] 26%|██▌ | 15809/61904 [8:12:07<17:10:57, 1.34s/it] 26%|██▌ | 15810/61904 [8:12:09<18:17:52, 1.43s/it] 26%|██▌ | 15811/61904 [8:12:10<18:07:05, 1.42s/it] 26%|██▌ | 15812/61904 [8:12:12<17:20:50, 1.35s/it] 26%|██▌ | 15813/61904 [8:12:13<17:49:35, 1.39s/it] 26%|██▌ | 15814/61904 [8:12:14<17:32:58, 1.37s/it] 26%|██▌ | 15815/61904 [8:12:16<17:28:43, 1.37s/it] 26%|██▌ | 15816/61904 [8:12:17<17:22:54, 1.36s/it] 26%|██▌ | 15817/61904 [8:12:18<17:08:33, 1.34s/it] 26%|██▌ | 15818/61904 [8:12:20<16:57:40, 1.32s/it] 26%|██▌ | 15819/61904 [8:12:21<16:45:31, 1.31s/it] 26%|██▌ | 15820/61904 [8:12:22<16:50:57, 1.32s/it] {'loss': 2.7462, 'learning_rate': 1.746855957474394e-07, 'epoch': 4.09} + 26%|██▌ | 15820/61904 [8:12:22<16:50:57, 1.32s/it] 26%|██▌ | 15821/61904 [8:12:24<17:06:13, 1.34s/it] 26%|██▌ | 15822/61904 [8:12:25<17:21:21, 1.36s/it] 26%|██▌ | 15823/61904 [8:12:26<17:08:37, 1.34s/it] 26%|██▌ | 15824/61904 [8:12:28<17:31:04, 1.37s/it] 26%|██▌ | 15825/61904 [8:12:29<17:00:12, 1.33s/it] 26%|██▌ | 15826/61904 [8:12:31<17:16:21, 1.35s/it] 26%|██▌ | 15827/61904 [8:12:32<17:41:16, 1.38s/it] 26%|██▌ | 15828/61904 [8:12:33<17:43:04, 1.38s/it] 26%|██▌ | 15829/61904 [8:12:35<17:50:26, 1.39s/it] 26%|██▌ | 15830/61904 [8:12:36<18:22:55, 1.44s/it] 26%|██▌ | 15831/61904 [8:12:38<17:35:58, 1.38s/it] 26%|██▌ | 15832/61904 [8:12:39<17:05:43, 1.34s/it] 26%|██▌ | 15833/61904 [8:12:40<16:53:27, 1.32s/it] 26%|██▌ | 15834/61904 [8:12:41<16:56:58, 1.32s/it] 26%|██▌ | 15835/61904 [8:12:43<16:44:55, 1.31s/it] 26%|██▌ | 15836/61904 [8:12:44<17:26:34, 1.36s/it] 26%|██▌ | 15837/61904 [8:12:46<17:39:16, 1.38s/it] 26%|██▌ | 15838/61904 [8:12:47<17:24:08, 1.36s/it] 26%|██▌ | 15839/61904 [8:12:48<17:14:55, 1.35s/it] 26%|██▌ | 15840/61904 [8:12:50<17:52:41, 1.40s/it] {'loss': 2.7116, 'learning_rate': 1.7465318293789705e-07, 'epoch': 4.09} + 26%|██▌ | 15840/61904 [8:12:50<17:52:41, 1.40s/it] 26%|██▌ | 15841/61904 [8:12:51<17:56:58, 1.40s/it] 26%|██▌ | 15842/61904 [8:12:52<17:25:12, 1.36s/it] 26%|██▌ | 15843/61904 [8:12:54<17:09:42, 1.34s/it] 26%|██▌ | 15844/61904 [8:12:55<17:16:59, 1.35s/it] 26%|██▌ | 15845/61904 [8:12:56<17:20:28, 1.36s/it] 26%|██▌ | 15846/61904 [8:12:58<17:07:35, 1.34s/it] 26%|██▌ | 15847/61904 [8:12:59<17:14:08, 1.35s/it] 26%|██▌ | 15848/61904 [8:13:01<17:29:04, 1.37s/it] 26%|██▌ | 15849/61904 [8:13:02<18:38:25, 1.46s/it] 26%|██▌ | 15850/61904 [8:13:04<18:56:49, 1.48s/it] 26%|██▌ | 15851/61904 [8:13:05<18:07:14, 1.42s/it] 26%|██▌ | 15852/61904 [8:13:06<17:51:55, 1.40s/it] 26%|██▌ | 15853/61904 [8:13:08<17:42:13, 1.38s/it] 26%|██▌ | 15854/61904 [8:13:09<18:20:27, 1.43s/it] 26%|██▌ | 15855/61904 [8:13:11<17:49:28, 1.39s/it] 26%|██▌ | 15856/61904 [8:13:12<17:37:53, 1.38s/it] 26%|██▌ | 15857/61904 [8:13:13<17:31:26, 1.37s/it] 26%|██▌ | 15858/61904 [8:13:15<17:28:39, 1.37s/it] 26%|██▌ | 15859/61904 [8:13:16<17:39:00, 1.38s/it] 26%|██▌ | 15860/61904 [8:13:17<17:13:21, 1.35s/it] {'loss': 2.7221, 'learning_rate': 1.746207701283547e-07, 'epoch': 4.1} + 26%|██▌ | 15860/61904 [8:13:17<17:13:21, 1.35s/it] 26%|██▌ | 15861/61904 [8:13:19<18:19:40, 1.43s/it] 26%|██▌ | 15862/61904 [8:13:20<18:05:37, 1.41s/it] 26%|██▌ | 15863/61904 [8:13:22<18:53:43, 1.48s/it] 26%|██▌ | 15864/61904 [8:13:23<18:21:37, 1.44s/it] 26%|██▌ | 15865/61904 [8:13:25<18:41:10, 1.46s/it] 26%|██▌ | 15866/61904 [8:13:26<18:14:25, 1.43s/it] 26%|██▌ | 15867/61904 [8:13:27<18:05:22, 1.41s/it] 26%|██▌ | 15868/61904 [8:13:29<17:23:57, 1.36s/it] 26%|██▌ | 15869/61904 [8:13:30<16:47:27, 1.31s/it] 26%|██▌ | 15870/61904 [8:13:31<16:42:23, 1.31s/it] 26%|██▌ | 15871/61904 [8:13:32<16:30:57, 1.29s/it] 26%|██▌ | 15872/61904 [8:13:34<16:54:04, 1.32s/it] 26%|██▌ | 15873/61904 [8:13:35<17:39:22, 1.38s/it] 26%|██▌ | 15874/61904 [8:13:37<17:41:12, 1.38s/it] 26%|██▌ | 15875/61904 [8:13:38<17:24:31, 1.36s/it] 26%|██▌ | 15876/61904 [8:13:39<17:33:49, 1.37s/it] 26%|██▌ | 15877/61904 [8:13:41<17:53:29, 1.40s/it] 26%|██▌ | 15878/61904 [8:13:42<17:26:48, 1.36s/it] 26%|██▌ | 15879/61904 [8:13:44<17:16:08, 1.35s/it] 26%|██▌ | 15880/61904 [8:13:45<17:13:09, 1.35s/it] {'loss': 2.7217, 'learning_rate': 1.745883573188124e-07, 'epoch': 4.1} + 26%|██▌ | 15880/61904 [8:13:45<17:13:09, 1.35s/it] 26%|██▌ | 15881/61904 [8:13:46<17:24:49, 1.36s/it] 26%|██▌ | 15882/61904 [8:13:48<16:55:43, 1.32s/it] 26%|██▌ | 15883/61904 [8:13:49<17:13:10, 1.35s/it] 26%|██▌ | 15884/61904 [8:13:50<17:59:24, 1.41s/it] 26%|██▌ | 15885/61904 [8:13:52<18:07:43, 1.42s/it] 26%|██▌ | 15886/61904 [8:13:53<17:49:52, 1.39s/it] 26%|██▌ | 15887/61904 [8:13:55<17:51:17, 1.40s/it] 26%|██▌ | 15888/61904 [8:13:56<17:28:41, 1.37s/it] 26%|██▌ | 15889/61904 [8:13:57<17:05:29, 1.34s/it] 26%|██▌ | 15890/61904 [8:13:59<17:13:33, 1.35s/it] 26%|██▌ | 15891/61904 [8:14:00<17:16:04, 1.35s/it] 26%|██▌ | 15892/61904 [8:14:01<17:04:13, 1.34s/it] 26%|██▌ | 15893/61904 [8:14:02<16:39:40, 1.30s/it] 26%|██▌ | 15894/61904 [8:14:04<16:26:04, 1.29s/it] 26%|██▌ | 15895/61904 [8:14:05<17:34:26, 1.38s/it] 26%|██▌ | 15896/61904 [8:14:07<17:48:38, 1.39s/it] 26%|██▌ | 15897/61904 [8:14:08<18:03:54, 1.41s/it] 26%|██▌ | 15898/61904 [8:14:09<17:24:05, 1.36s/it] 26%|██▌ | 15899/61904 [8:14:11<17:02:05, 1.33s/it] 26%|██▌ | 15900/61904 [8:14:12<17:49:49, 1.40s/it] {'loss': 2.6742, 'learning_rate': 1.7455594450927004e-07, 'epoch': 4.11} + 26%|██▌ | 15900/61904 [8:14:12<17:49:49, 1.40s/it] 26%|██▌ | 15901/61904 [8:14:14<17:56:24, 1.40s/it] 26%|██▌ | 15902/61904 [8:14:15<17:43:02, 1.39s/it] 26%|██▌ | 15903/61904 [8:14:17<18:41:24, 1.46s/it] 26%|██▌ | 15904/61904 [8:14:18<18:01:53, 1.41s/it] 26%|██▌ | 15905/61904 [8:14:19<18:00:04, 1.41s/it] 26%|██▌ | 15906/61904 [8:14:21<17:47:14, 1.39s/it] 26%|██▌ | 15907/61904 [8:14:22<17:19:31, 1.36s/it] 26%|██▌ | 15908/61904 [8:14:23<17:10:01, 1.34s/it] 26%|██▌ | 15909/61904 [8:14:25<17:44:32, 1.39s/it] 26%|██▌ | 15910/61904 [8:14:26<17:53:41, 1.40s/it] 26%|██▌ | 15911/61904 [8:14:28<17:49:39, 1.40s/it] 26%|██▌ | 15912/61904 [8:14:29<17:45:56, 1.39s/it] 26%|██▌ | 15913/61904 [8:14:30<17:30:36, 1.37s/it] 26%|██▌ | 15914/61904 [8:14:32<17:02:29, 1.33s/it] 26%|██▌ | 15915/61904 [8:14:33<16:48:48, 1.32s/it] 26%|██▌ | 15916/61904 [8:14:34<17:07:25, 1.34s/it] 26%|██▌ | 15917/61904 [8:14:36<17:17:29, 1.35s/it] 26%|██▌ | 15918/61904 [8:14:37<17:25:51, 1.36s/it] 26%|██▌ | 15919/61904 [8:14:38<17:14:56, 1.35s/it] 26%|██▌ | 15920/61904 [8:14:40<17:18:35, 1.36s/it] {'loss': 2.7309, 'learning_rate': 1.7452353169972772e-07, 'epoch': 4.11} + 26%|██▌ | 15920/61904 [8:14:40<17:18:35, 1.36s/it] 26%|██▌ | 15921/61904 [8:14:41<17:32:09, 1.37s/it] 26%|██▌ | 15922/61904 [8:14:42<17:35:27, 1.38s/it] 26%|██▌ | 15923/61904 [8:14:44<17:08:14, 1.34s/it] 26%|██▌ | 15924/61904 [8:14:45<17:21:59, 1.36s/it] 26%|██▌ | 15925/61904 [8:14:46<17:03:19, 1.34s/it] 26%|██▌ | 15926/61904 [8:14:48<17:08:41, 1.34s/it] 26%|██▌ | 15927/61904 [8:14:49<17:27:31, 1.37s/it] 26%|██▌ | 15928/61904 [8:14:50<16:57:18, 1.33s/it] 26%|██▌ | 15929/61904 [8:14:52<16:42:54, 1.31s/it] 26%|██▌ | 15930/61904 [8:14:53<16:59:08, 1.33s/it] 26%|██▌ | 15931/61904 [8:14:55<17:26:37, 1.37s/it] 26%|██▌ | 15932/61904 [8:14:56<18:33:02, 1.45s/it] 26%|██▌ | 15933/61904 [8:14:58<18:09:53, 1.42s/it] 26%|██▌ | 15934/61904 [8:14:59<17:32:46, 1.37s/it] 26%|██▌ | 15935/61904 [8:15:00<17:40:16, 1.38s/it] 26%|██▌ | 15936/61904 [8:15:02<17:26:09, 1.37s/it] 26%|██▌ | 15937/61904 [8:15:03<17:22:17, 1.36s/it] 26%|██▌ | 15938/61904 [8:15:04<16:38:31, 1.30s/it] 26%|██�� | 15939/61904 [8:15:05<16:50:58, 1.32s/it] 26%|██▌ | 15940/61904 [8:15:07<17:33:38, 1.38s/it] {'loss': 2.6948, 'learning_rate': 1.744911188901854e-07, 'epoch': 4.12} + 26%|██▌ | 15940/61904 [8:15:07<17:33:38, 1.38s/it] 26%|██▌ | 15941/61904 [8:15:08<17:13:23, 1.35s/it] 26%|██▌ | 15942/61904 [8:15:10<17:23:23, 1.36s/it] 26%|██▌ | 15943/61904 [8:15:11<17:11:19, 1.35s/it] 26%|██▌ | 15944/61904 [8:15:12<17:12:37, 1.35s/it] 26%|██▌ | 15945/61904 [8:15:14<17:07:59, 1.34s/it] 26%|██▌ | 15946/61904 [8:15:15<17:13:11, 1.35s/it] 26%|██▌ | 15947/61904 [8:15:16<17:25:16, 1.36s/it] 26%|██▌ | 15948/61904 [8:15:18<17:40:01, 1.38s/it] 26%|██▌ | 15949/61904 [8:15:19<17:03:12, 1.34s/it] 26%|██▌ | 15950/61904 [8:15:20<16:48:13, 1.32s/it] 26%|██▌ | 15951/61904 [8:15:22<17:04:42, 1.34s/it] 26%|██▌ | 15952/61904 [8:15:23<17:13:24, 1.35s/it] 26%|██▌ | 15953/61904 [8:15:24<17:03:41, 1.34s/it] 26%|██▌ | 15954/61904 [8:15:26<16:54:34, 1.32s/it] 26%|██▌ | 15955/61904 [8:15:27<17:00:20, 1.33s/it] 26%|██▌ | 15956/61904 [8:15:29<17:46:30, 1.39s/it] 26%|██▌ | 15957/61904 [8:15:30<17:23:39, 1.36s/it] 26%|██▌ | 15958/61904 [8:15:31<17:40:50, 1.39s/it] 26%|██▌ | 15959/61904 [8:15:33<18:00:35, 1.41s/it] 26%|██▌ | 15960/61904 [8:15:34<18:37:16, 1.46s/it] {'loss': 2.6965, 'learning_rate': 1.7445870608064305e-07, 'epoch': 4.12} + 26%|██▌ | 15960/61904 [8:15:34<18:37:16, 1.46s/it] 26%|██▌ | 15961/61904 [8:15:36<18:25:44, 1.44s/it] 26%|██▌ | 15962/61904 [8:15:37<18:09:15, 1.42s/it] 26%|██▌ | 15963/61904 [8:15:39<18:19:26, 1.44s/it] 26%|██▌ | 15964/61904 [8:15:40<17:38:01, 1.38s/it] 26%|██▌ | 15965/61904 [8:15:41<17:09:12, 1.34s/it] 26%|██▌ | 15966/61904 [8:15:42<17:24:16, 1.36s/it] 26%|██▌ | 15967/61904 [8:15:44<17:46:07, 1.39s/it] 26%|██▌ | 15968/61904 [8:15:45<17:21:45, 1.36s/it] 26%|██▌ | 15969/61904 [8:15:47<17:13:00, 1.35s/it] 26%|██▌ | 15970/61904 [8:15:48<17:08:38, 1.34s/it] 26%|██▌ | 15971/61904 [8:15:49<16:57:59, 1.33s/it] 26%|██▌ | 15972/61904 [8:15:51<17:05:34, 1.34s/it] 26%|██▌ | 15973/61904 [8:15:52<17:47:01, 1.39s/it] 26%|██▌ | 15974/61904 [8:15:54<18:10:10, 1.42s/it] 26%|██▌ | 15975/61904 [8:15:55<17:42:52, 1.39s/it] 26%|██▌ | 15976/61904 [8:15:56<17:48:49, 1.40s/it] 26%|██▌ | 15977/61904 [8:15:58<17:58:16, 1.41s/it] 26%|██▌ | 15978/61904 [8:15:59<17:41:38, 1.39s/it] 26%|██▌ | 15979/61904 [8:16:00<17:55:40, 1.41s/it] 26%|██▌ | 15980/61904 [8:16:02<17:21:21, 1.36s/it] {'loss': 2.6889, 'learning_rate': 1.7442629327110074e-07, 'epoch': 4.13} + 26%|██▌ | 15980/61904 [8:16:02<17:21:21, 1.36s/it] 26%|██▌ | 15981/61904 [8:16:03<16:54:17, 1.33s/it] 26%|██▌ | 15982/61904 [8:16:04<16:47:37, 1.32s/it] 26%|██▌ | 15983/61904 [8:16:06<17:02:40, 1.34s/it] 26%|██▌ | 15984/61904 [8:16:07<17:14:01, 1.35s/it] 26%|██▌ | 15985/61904 [8:16:08<17:07:35, 1.34s/it] 26%|██▌ | 15986/61904 [8:16:10<16:57:58, 1.33s/it] 26%|██▌ | 15987/61904 [8:16:11<17:14:56, 1.35s/it] 26%|██▌ | 15988/61904 [8:16:12<16:54:34, 1.33s/it] 26%|██▌ | 15989/61904 [8:16:14<16:45:59, 1.31s/it] 26%|██▌ | 15990/61904 [8:16:15<16:50:36, 1.32s/it] 26%|██▌ | 15991/61904 [8:16:16<17:14:20, 1.35s/it] 26%|██▌ | 15992/61904 [8:16:18<17:36:50, 1.38s/it] 26%|██▌ | 15993/61904 [8:16:19<17:20:53, 1.36s/it] 26%|██▌ | 15994/61904 [8:16:21<17:20:55, 1.36s/it] 26%|██▌ | 15995/61904 [8:16:22<17:51:44, 1.40s/it] 26%|██▌ | 15996/61904 [8:16:23<17:28:11, 1.37s/it] 26%|██▌ | 15997/61904 [8:16:25<17:25:36, 1.37s/it] 26%|██▌ | 15998/61904 [8:16:26<17:17:52, 1.36s/it] 26%|██▌ | 15999/61904 [8:16:27<17:25:19, 1.37s/it] 26%|██▌ | 16000/61904 [8:16:29<16:55:57, 1.33s/it] {'loss': 2.7632, 'learning_rate': 1.7439388046155843e-07, 'epoch': 4.13} + 26%|██▌ | 16000/61904 [8:16:29<16:55:57, 1.33s/it] 26%|██▌ | 16001/61904 [8:16:30<16:45:48, 1.31s/it] 26%|██▌ | 16002/61904 [8:16:31<16:28:12, 1.29s/it] 26%|██▌ | 16003/61904 [8:16:32<16:25:03, 1.29s/it] 26%|██▌ | 16004/61904 [8:16:34<16:29:01, 1.29s/it] 26%|██▌ | 16005/61904 [8:16:35<16:46:27, 1.32s/it] 26%|██▌ | 16006/61904 [8:16:36<16:34:36, 1.30s/it] 26%|██▌ | 16007/61904 [8:16:38<17:06:01, 1.34s/it] 26%|██▌ | 16008/61904 [8:16:39<16:55:30, 1.33s/it] 26%|██▌ | 16009/61904 [8:16:41<17:13:39, 1.35s/it] 26%|██▌ | 16010/61904 [8:16:42<16:59:42, 1.33s/it] 26%|██▌ | 16011/61904 [8:16:43<16:57:00, 1.33s/it] 26%|██▌ | 16012/61904 [8:16:44<16:53:20, 1.32s/it] 26%|██▌ | 16013/61904 [8:16:46<16:53:03, 1.32s/it] 26%|██▌ | 16014/61904 [8:16:47<16:44:31, 1.31s/it] 26%|██▌ | 16015/61904 [8:16:48<16:27:04, 1.29s/it] 26%|██▌ | 16016/61904 [8:16:50<16:21:35, 1.28s/it] 26%|██▌ | 16017/61904 [8:16:51<16:22:33, 1.28s/it] 26%|██▌ | 16018/61904 [8:16:52<16:19:23, 1.28s/it] 26%|██▌ | 16019/61904 [8:16:54<16:55:24, 1.33s/it] 26%|██▌ | 16020/61904 [8:16:55<17:38:48, 1.38s/it] {'loss': 2.6957, 'learning_rate': 1.7436146765201606e-07, 'epoch': 4.14} + 26%|██▌ | 16020/61904 [8:16:55<17:38:48, 1.38s/it] 26%|██▌ | 16021/61904 [8:16:56<17:46:22, 1.39s/it] 26%|██▌ | 16022/61904 [8:16:58<17:25:12, 1.37s/it] 26%|██▌ | 16023/61904 [8:16:59<16:51:31, 1.32s/it] 26%|██▌ | 16024/61904 [8:17:01<17:45:10, 1.39s/it] 26%|██▌ | 16025/61904 [8:17:02<18:25:56, 1.45s/it] 26%|██▌ | 16026/61904 [8:17:04<18:14:01, 1.43s/it] 26%|██▌ | 16027/61904 [8:17:05<17:50:10, 1.40s/it] 26%|██▌ | 16028/61904 [8:17:06<17:23:52, 1.37s/it] 26%|██▌ | 16029/61904 [8:17:08<17:34:10, 1.38s/it] 26%|██▌ | 16030/61904 [8:17:09<17:00:24, 1.33s/it] 26%|██▌ | 16031/61904 [8:17:10<17:22:34, 1.36s/it] 26%|██▌ | 16032/61904 [8:17:12<17:36:39, 1.38s/it] 26%|██▌ | 16033/61904 [8:17:13<17:58:51, 1.41s/it] 26%|██▌ | 16034/61904 [8:17:14<17:38:14, 1.38s/it] 26%|██▌ | 16035/61904 [8:17:16<17:27:51, 1.37s/it] 26%|██▌ | 16036/61904 [8:17:17<17:57:36, 1.41s/it] 26%|██▌ | 16037/61904 [8:17:19<17:22:16, 1.36s/it] 26%|██▌ | 16038/61904 [8:17:20<18:10:18, 1.43s/it] 26%|██▌ | 16039/61904 [8:17:21<17:49:51, 1.40s/it] 26%|██▌ | 16040/61904 [8:17:23<17:28:24, 1.37s/it] {'loss': 2.7433, 'learning_rate': 1.7432905484247375e-07, 'epoch': 4.15} + 26%|██▌ | 16040/61904 [8:17:23<17:28:24, 1.37s/it] 26%|██▌ | 16041/61904 [8:17:24<17:07:09, 1.34s/it] 26%|██▌ | 16042/61904 [8:17:25<17:26:02, 1.37s/it] 26%|██▌ | 16043/61904 [8:17:27<17:10:08, 1.35s/it] 26%|██▌ | 16044/61904 [8:17:28<17:13:39, 1.35s/it] 26%|██▌ | 16045/61904 [8:17:30<17:22:27, 1.36s/it] 26%|██▌ | 16046/61904 [8:17:31<17:07:36, 1.34s/it] 26%|██▌ | 16047/61904 [8:17:32<17:32:06, 1.38s/it] 26%|██▌ | 16048/61904 [8:17:34<17:29:47, 1.37s/it] 26%|██▌ | 16049/61904 [8:17:35<18:00:15, 1.41s/it] 26%|██▌ | 16050/61904 [8:17:37<18:18:45, 1.44s/it] 26%|██▌ | 16051/61904 [8:17:38<17:52:57, 1.40s/it] 26%|██▌ | 16052/61904 [8:17:39<17:21:59, 1.36s/it] 26%|██▌ | 16053/61904 [8:17:41<17:19:30, 1.36s/it] 26%|██▌ | 16054/61904 [8:17:42<17:08:04, 1.35s/it] 26%|██▌ | 16055/61904 [8:17:44<18:17:21, 1.44s/it] 26%|██▌ | 16056/61904 [8:17:45<18:08:12, 1.42s/it] 26%|██▌ | 16057/61904 [8:17:46<17:51:22, 1.40s/it] 26%|██▌ | 16058/61904 [8:17:48<17:38:42, 1.39s/it] 26%|██▌ | 16059/61904 [8:17:49<17:32:28, 1.38s/it] 26%|██▌ | 16060/61904 [8:17:50<17:29:36, 1.37s/it] {'loss': 2.7098, 'learning_rate': 1.742966420329314e-07, 'epoch': 4.15} + 26%|██▌ | 16060/61904 [8:17:50<17:29:36, 1.37s/it] 26%|██▌ | 16061/61904 [8:17:52<16:59:41, 1.33s/it] 26%|██▌ | 16062/61904 [8:17:53<18:00:59, 1.41s/it] 26%|██▌ | 16063/61904 [8:17:55<18:01:50, 1.42s/it] 26%|██▌ | 16064/61904 [8:17:56<17:56:26, 1.41s/it] 26%|██▌ | 16065/61904 [8:17:57<17:28:51, 1.37s/it] 26%|██▌ | 16066/61904 [8:17:59<17:37:33, 1.38s/it] 26%|██▌ | 16067/61904 [8:18:00<17:34:40, 1.38s/it] 26%|██▌ | 16068/61904 [8:18:01<17:24:46, 1.37s/it] 26%|██▌ | 16069/61904 [8:18:03<17:52:10, 1.40s/it] 26%|██▌ | 16070/61904 [8:18:04<17:54:54, 1.41s/it] 26%|██▌ | 16071/61904 [8:18:06<17:48:02, 1.40s/it] 26%|██▌ | 16072/61904 [8:18:07<17:39:10, 1.39s/it] 26%|██▌ | 16073/61904 [8:18:08<17:46:33, 1.40s/it] 26%|██▌ | 16074/61904 [8:18:10<17:41:56, 1.39s/it] 26%|██▌ | 16075/61904 [8:18:11<17:42:48, 1.39s/it] 26%|██▌ | 16076/61904 [8:18:13<17:30:19, 1.38s/it] 26%|██▌ | 16077/61904 [8:18:14<17:03:53, 1.34s/it] 26%|██▌ | 16078/61904 [8:18:15<17:38:09, 1.39s/it] 26%|██▌ | 16079/61904 [8:18:17<17:47:03, 1.40s/it] 26%|██▌ | 16080/61904 [8:18:18<17:47:01, 1.40s/it] {'loss': 2.7675, 'learning_rate': 1.7426422922338907e-07, 'epoch': 4.16} + 26%|██▌ | 16080/61904 [8:18:18<17:47:01, 1.40s/it] 26%|██▌ | 16081/61904 [8:18:20<17:41:24, 1.39s/it] 26%|██▌ | 16082/61904 [8:18:21<17:06:49, 1.34s/it] 26%|██▌ | 16083/61904 [8:18:22<16:55:00, 1.33s/it] 26%|██▌ | 16084/61904 [8:18:23<16:53:41, 1.33s/it] 26%|██▌ | 16085/61904 [8:18:25<17:23:20, 1.37s/it] 26%|██▌ | 16086/61904 [8:18:26<18:13:14, 1.43s/it] 26%|██▌ | 16087/61904 [8:18:28<17:21:53, 1.36s/it] 26%|██▌ | 16088/61904 [8:18:29<17:16:15, 1.36s/it] 26%|██▌ | 16089/61904 [8:18:30<17:03:25, 1.34s/it] 26%|██▌ | 16090/61904 [8:18:32<17:12:13, 1.35s/it] 26%|██▌ | 16091/61904 [8:18:33<16:47:22, 1.32s/it] 26%|██▌ | 16092/61904 [8:18:34<17:10:10, 1.35s/it] 26%|██▌ | 16093/61904 [8:18:36<17:16:12, 1.36s/it] 26%|██▌ | 16094/61904 [8:18:37<17:25:09, 1.37s/it] 26%|██▌ | 16095/61904 [8:18:38<17:07:42, 1.35s/it] 26%|██▌ | 16096/61904 [8:18:40<17:43:44, 1.39s/it] 26%|██▌ | 16097/61904 [8:18:41<17:19:09, 1.36s/it] 26%|██▌ | 16098/61904 [8:18:43<17:26:41, 1.37s/it] 26%|██▌ | 16099/61904 [8:18:44<17:42:36, 1.39s/it] 26%|██▌ | 16100/61904 [8:18:45<17:23:46, 1.37s/it] {'loss': 2.7256, 'learning_rate': 1.7423181641384676e-07, 'epoch': 4.16} + 26%|██▌ | 16100/61904 [8:18:45<17:23:46, 1.37s/it] 26%|██▌ | 16101/61904 [8:18:47<17:23:32, 1.37s/it] 26%|██▌ | 16102/61904 [8:18:48<17:25:08, 1.37s/it] 26%|██▌ | 16103/61904 [8:18:50<17:45:10, 1.40s/it] 26%|██▌ | 16104/61904 [8:18:51<17:56:32, 1.41s/it] 26%|██▌ | 16105/61904 [8:18:52<17:59:22, 1.41s/it] 26%|██▌ | 16106/61904 [8:18:54<17:28:56, 1.37s/it] 26%|██▌ | 16107/61904 [8:18:55<17:41:11, 1.39s/it] 26%|██▌ | 16108/61904 [8:18:56<17:22:13, 1.37s/it] 26%|██▌ | 16109/61904 [8:18:58<17:12:06, 1.35s/it] 26%|██▌ | 16110/61904 [8:18:59<17:14:29, 1.36s/it] 26%|██▌ | 16111/61904 [8:19:00<17:13:54, 1.35s/it] 26%|██▌ | 16112/61904 [8:19:02<17:48:50, 1.40s/it] 26%|██▌ | 16113/61904 [8:19:03<17:22:46, 1.37s/it] 26%|██▌ | 16114/61904 [8:19:05<17:11:57, 1.35s/it] 26%|██▌ | 16115/61904 [8:19:06<16:48:58, 1.32s/it] 26%|██▌ | 16116/61904 [8:19:07<17:26:21, 1.37s/it] 26%|██▌ | 16117/61904 [8:19:09<17:19:24, 1.36s/it] 26%|██▌ | 16118/61904 [8:19:10<17:12:22, 1.35s/it] 26%|██▌ | 16119/61904 [8:19:11<16:51:26, 1.33s/it] 26%|██▌ | 16120/61904 [8:19:13<17:12:41, 1.35s/it] {'loss': 2.7179, 'learning_rate': 1.741994036043044e-07, 'epoch': 4.17} + 26%|██▌ | 16120/61904 [8:19:13<17:12:41, 1.35s/it] 26%|██▌ | 16121/61904 [8:19:14<17:19:47, 1.36s/it] 26%|██▌ | 16122/61904 [8:19:15<17:26:15, 1.37s/it] 26%|██▌ | 16123/61904 [8:19:17<17:08:35, 1.35s/it] 26%|██▌ | 16124/61904 [8:19:18<16:40:24, 1.31s/it] 26%|██▌ | 16125/61904 [8:19:19<16:30:05, 1.30s/it] 26%|██▌ | 16126/61904 [8:19:21<16:43:19, 1.32s/it] 26%|██▌ | 16127/61904 [8:19:22<16:55:15, 1.33s/it] 26%|██▌ | 16128/61904 [8:19:23<17:14:07, 1.36s/it] 26%|██▌ | 16129/61904 [8:19:25<17:31:59, 1.38s/it] 26%|██▌ | 16130/61904 [8:19:26<17:38:23, 1.39s/it] 26%|██▌ | 16131/61904 [8:19:28<18:37:38, 1.47s/it] 26%|██▌ | 16132/61904 [8:19:29<17:51:07, 1.40s/it] 26%|██▌ | 16133/61904 [8:19:31<18:17:50, 1.44s/it] 26%|██▌ | 16134/61904 [8:19:32<17:57:34, 1.41s/it] 26%|██▌ | 16135/61904 [8:19:34<18:29:56, 1.46s/it] 26%|██▌ | 16136/61904 [8:19:35<17:54:10, 1.41s/it] 26%|██▌ | 16137/61904 [8:19:36<17:41:46, 1.39s/it] 26%|██▌ | 16138/61904 [8:19:38<19:03:24, 1.50s/it] 26%|██▌ | 16139/61904 [8:19:39<18:32:43, 1.46s/it] 26%|██▌ | 16140/61904 [8:19:41<18:31:25, 1.46s/it] {'loss': 2.6592, 'learning_rate': 1.7416699079476208e-07, 'epoch': 4.17} + 26%|██▌ | 16140/61904 [8:19:41<18:31:25, 1.46s/it] 26%|██▌ | 16141/61904 [8:19:42<17:46:39, 1.40s/it] 26%|██▌ | 16142/61904 [8:19:43<17:31:58, 1.38s/it] 26%|██▌ | 16143/61904 [8:19:45<17:51:27, 1.40s/it] 26%|██▌ | 16144/61904 [8:19:46<17:20:24, 1.36s/it] 26%|██▌ | 16145/61904 [8:19:47<16:59:27, 1.34s/it] 26%|██▌ | 16146/61904 [8:19:49<16:37:50, 1.31s/it] 26%|██▌ | 16147/61904 [8:19:50<17:05:16, 1.34s/it] 26%|██▌ | 16148/61904 [8:19:51<17:03:27, 1.34s/it] 26%|██▌ | 16149/61904 [8:19:53<16:58:22, 1.34s/it] 26%|██▌ | 16150/61904 [8:19:54<16:49:34, 1.32s/it] 26%|██▌ | 16151/61904 [8:19:55<16:51:47, 1.33s/it] 26%|██▌ | 16152/61904 [8:19:57<17:03:12, 1.34s/it] 26%|██▌ | 16153/61904 [8:19:58<16:47:24, 1.32s/it] 26%|██▌ | 16154/61904 [8:19:59<16:49:17, 1.32s/it] 26%|██▌ | 16155/61904 [8:20:01<16:57:14, 1.33s/it] 26%|██▌ | 16156/61904 [8:20:02<17:57:38, 1.41s/it] 26%|██▌ | 16157/61904 [8:20:04<17:41:14, 1.39s/it] 26%|██▌ | 16158/61904 [8:20:05<17:14:39, 1.36s/it] 26%|██▌ | 16159/61904 [8:20:06<17:53:45, 1.41s/it] 26%|██▌ | 16160/61904 [8:20:08<17:43:44, 1.40s/it] {'loss': 2.7441, 'learning_rate': 1.7413457798521977e-07, 'epoch': 4.18} + 26%|██▌ | 16160/61904 [8:20:08<17:43:44, 1.40s/it] 26%|██▌ | 16161/61904 [8:20:09<17:33:14, 1.38s/it] 26%|██▌ | 16162/61904 [8:20:11<17:49:40, 1.40s/it] 26%|██▌ | 16163/61904 [8:20:12<17:25:57, 1.37s/it] 26%|██▌ | 16164/61904 [8:20:13<17:18:11, 1.36s/it] 26%|██▌ | 16165/61904 [8:20:15<17:27:48, 1.37s/it] 26%|██▌ | 16166/61904 [8:20:16<17:57:03, 1.41s/it] 26%|██▌ | 16167/61904 [8:20:17<17:26:09, 1.37s/it] 26%|██▌ | 16168/61904 [8:20:19<16:50:02, 1.33s/it] 26%|██▌ | 16169/61904 [8:20:20<17:19:23, 1.36s/it] 26%|██▌ | 16170/61904 [8:20:21<17:27:29, 1.37s/it] 26%|██▌ | 16171/61904 [8:20:23<17:07:41, 1.35s/it] 26%|██▌ | 16172/61904 [8:20:24<16:43:07, 1.32s/it] 26%|██▌ | 16173/61904 [8:20:25<16:58:08, 1.34s/it] 26%|██▌ | 16174/61904 [8:20:27<17:30:38, 1.38s/it] 26%|██▌ | 16175/61904 [8:20:28<17:23:35, 1.37s/it] 26%|██▌ | 16176/61904 [8:20:30<17:26:08, 1.37s/it] 26%|██▌ | 16177/61904 [8:20:31<17:21:03, 1.37s/it] 26%|██▌ | 16178/61904 [8:20:32<18:02:49, 1.42s/it] 26%|██▌ | 16179/61904 [8:20:34<17:30:00, 1.38s/it] 26%|██▌ | 16180/61904 [8:20:35<17:59:29, 1.42s/it] {'loss': 2.66, 'learning_rate': 1.741021651756774e-07, 'epoch': 4.18} + 26%|██▌ | 16180/61904 [8:20:35<17:59:29, 1.42s/it] 26%|██▌ | 16181/61904 [8:20:37<18:30:43, 1.46s/it] 26%|██▌ | 16182/61904 [8:20:38<17:52:39, 1.41s/it] 26%|██▌ | 16183/61904 [8:20:39<17:32:23, 1.38s/it] 26%|██▌ | 16184/61904 [8:20:41<17:56:40, 1.41s/it] 26%|██▌ | 16185/61904 [8:20:42<17:33:20, 1.38s/it] 26%|██▌ | 16186/61904 [8:20:44<17:32:00, 1.38s/it] 26%|██▌ | 16187/61904 [8:20:45<17:04:09, 1.34s/it] 26%|██▌ | 16188/61904 [8:20:46<17:10:19, 1.35s/it] 26%|██▌ | 16189/61904 [8:20:47<16:43:46, 1.32s/it] 26%|██▌ | 16190/61904 [8:20:49<16:39:33, 1.31s/it] 26%|██▌ | 16191/61904 [8:20:50<16:49:56, 1.33s/it] 26%|██▌ | 16192/61904 [8:20:51<17:07:07, 1.35s/it] 26%|██▌ | 16193/61904 [8:20:53<17:16:06, 1.36s/it] 26%|██▌ | 16194/61904 [8:20:54<17:44:20, 1.40s/it] 26%|██▌ | 16195/61904 [8:20:56<17:59:25, 1.42s/it] 26%|██▌ | 16196/61904 [8:20:57<17:59:25, 1.42s/it] 26%|██▌ | 16197/61904 [8:20:59<18:01:59, 1.42s/it] 26%|██▌ | 16198/61904 [8:21:00<17:45:06, 1.40s/it] 26%|██▌ | 16199/61904 [8:21:01<17:49:08, 1.40s/it] 26%|██▌ | 16200/61904 [8:21:03<17:58:04, 1.42s/it] {'loss': 2.687, 'learning_rate': 1.740697523661351e-07, 'epoch': 4.19} + 26%|██▌ | 16200/61904 [8:21:03<17:58:04, 1.42s/it] 26%|██▌ | 16201/61904 [8:21:04<17:16:41, 1.36s/it] 26%|██▌ | 16202/61904 [8:21:06<17:30:33, 1.38s/it] 26%|██▌ | 16203/61904 [8:21:07<17:46:42, 1.40s/it] 26%|██▌ | 16204/61904 [8:21:08<17:06:05, 1.35s/it] 26%|██▌ | 16205/61904 [8:21:10<17:20:09, 1.37s/it] 26%|██▌ | 16206/61904 [8:21:11<17:32:19, 1.38s/it] 26%|██▌ | 16207/61904 [8:21:12<17:20:33, 1.37s/it] 26%|██▌ | 16208/61904 [8:21:14<17:07:18, 1.35s/it] 26%|██▌ | 16209/61904 [8:21:15<17:19:01, 1.36s/it] 26%|██▌ | 16210/61904 [8:21:16<16:55:41, 1.33s/it] 26%|██▌ | 16211/61904 [8:21:18<17:39:35, 1.39s/it] 26%|██▌ | 16212/61904 [8:21:19<17:42:40, 1.40s/it] 26%|██▌ | 16213/61904 [8:21:21<17:32:21, 1.38s/it] 26%|██▌ | 16214/61904 [8:21:22<17:13:31, 1.36s/it] 26%|██▌ | 16215/61904 [8:21:23<17:39:17, 1.39s/it] 26%|██▌ | 16216/61904 [8:21:25<18:58:04, 1.49s/it] 26%|██▌ | 16217/61904 [8:21:26<18:20:51, 1.45s/it] 26%|██▌ | 16218/61904 [8:21:28<17:37:08, 1.39s/it] 26%|██▌ | 16219/61904 [8:21:29<17:23:22, 1.37s/it] 26%|██▌ | 16220/61904 [8:21:30<17:38:49, 1.39s/it] {'loss': 2.7227, 'learning_rate': 1.7403733955659276e-07, 'epoch': 4.19} + 26%|██▌ | 16220/61904 [8:21:30<17:38:49, 1.39s/it] 26%|██▌ | 16221/61904 [8:21:32<17:16:29, 1.36s/it] 26%|██▌ | 16222/61904 [8:21:33<17:30:16, 1.38s/it] 26%|██▌ | 16223/61904 [8:21:35<17:41:50, 1.39s/it] 26%|██▌ | 16224/61904 [8:21:36<17:52:31, 1.41s/it] 26%|██▌ | 16225/61904 [8:21:37<17:24:16, 1.37s/it] 26%|██▌ | 16226/61904 [8:21:39<17:03:56, 1.34s/it] 26%|██▌ | 16227/61904 [8:21:40<17:09:03, 1.35s/it] 26%|██▌ | 16228/61904 [8:21:41<17:19:19, 1.37s/it] 26%|██▌ | 16229/61904 [8:21:43<16:55:23, 1.33s/it] 26%|██▌ | 16230/61904 [8:21:44<16:36:02, 1.31s/it] 26%|██▌ | 16231/61904 [8:21:45<17:13:10, 1.36s/it] 26%|██▌ | 16232/61904 [8:21:47<17:27:57, 1.38s/it] 26%|██▌ | 16233/61904 [8:21:48<16:54:13, 1.33s/it] 26%|██▌ | 16234/61904 [8:21:49<16:55:06, 1.33s/it] 26%|██▌ | 16235/61904 [8:21:51<17:15:22, 1.36s/it] 26%|██▌ | 16236/61904 [8:21:52<17:39:17, 1.39s/it] 26%|██▌ | 16237/61904 [8:21:54<17:21:41, 1.37s/it] 26%|██▌ | 16238/61904 [8:21:55<16:44:14, 1.32s/it] 26%|██▌ | 16239/61904 [8:21:56<16:54:59, 1.33s/it] 26%|██▌ | 16240/61904 [8:21:58<17:14:26, 1.36s/it] {'loss': 2.7591, 'learning_rate': 1.7400492674705042e-07, 'epoch': 4.2} + 26%|██▌ | 16240/61904 [8:21:58<17:14:26, 1.36s/it] 26%|██▌ | 16241/61904 [8:21:59<16:51:54, 1.33s/it] 26%|██▌ | 16242/61904 [8:22:00<16:44:52, 1.32s/it] 26%|██▌ | 16243/61904 [8:22:01<16:38:27, 1.31s/it] 26%|██▌ | 16244/61904 [8:22:03<17:09:16, 1.35s/it] 26%|██▌ | 16245/61904 [8:22:04<16:57:50, 1.34s/it] 26%|██▌ | 16246/61904 [8:22:05<16:53:45, 1.33s/it] 26%|██▌ | 16247/61904 [8:22:07<16:57:42, 1.34s/it] 26%|██▌ | 16248/61904 [8:22:08<17:00:47, 1.34s/it] 26%|██▌ | 16249/61904 [8:22:10<17:19:41, 1.37s/it] 26%|██▋ | 16250/61904 [8:22:11<17:14:05, 1.36s/it] 26%|██▋ | 16251/61904 [8:22:12<17:18:12, 1.36s/it] 26%|██▋ | 16252/61904 [8:22:14<16:59:53, 1.34s/it] 26%|██▋ | 16253/61904 [8:22:15<17:11:54, 1.36s/it] 26%|██▋ | 16254/61904 [8:22:16<16:52:33, 1.33s/it] 26%|██▋ | 16255/61904 [8:22:18<16:41:40, 1.32s/it] 26%|██▋ | 16256/61904 [8:22:19<16:19:39, 1.29s/it] 26%|██▋ | 16257/61904 [8:22:20<17:02:53, 1.34s/it] 26%|██▋ | 16258/61904 [8:22:22<17:05:42, 1.35s/it] 26%|██▋ | 16259/61904 [8:22:23<17:13:32, 1.36s/it] 26%|██▋ | 16260/61904 [8:22:24<17:45:47, 1.40s/it] {'loss': 2.7063, 'learning_rate': 1.739725139375081e-07, 'epoch': 4.2} + 26%|██▋ | 16260/61904 [8:22:24<17:45:47, 1.40s/it] 26%|██▋ | 16261/61904 [8:22:26<17:22:07, 1.37s/it] 26%|██▋ | 16262/61904 [8:22:27<16:50:01, 1.33s/it] 26%|██▋ | 16263/61904 [8:22:28<16:47:55, 1.33s/it] 26%|██▋ | 16264/61904 [8:22:30<17:01:34, 1.34s/it] 26%|██▋ | 16265/61904 [8:22:31<17:09:37, 1.35s/it] 26%|██▋ | 16266/61904 [8:22:32<17:10:01, 1.35s/it] 26%|██▋ | 16267/61904 [8:22:34<17:08:23, 1.35s/it] 26%|██▋ | 16268/61904 [8:22:35<18:07:22, 1.43s/it] 26%|██▋ | 16269/61904 [8:22:37<17:48:47, 1.41s/it] 26%|██▋ | 16270/61904 [8:22:38<17:56:41, 1.42s/it] 26%|██▋ | 16271/61904 [8:22:40<17:41:08, 1.40s/it] 26%|██▋ | 16272/61904 [8:22:41<17:21:17, 1.37s/it] 26%|██▋ | 16273/61904 [8:22:42<17:27:29, 1.38s/it] 26%|██▋ | 16274/61904 [8:22:44<17:21:22, 1.37s/it] 26%|██▋ | 16275/61904 [8:22:45<18:19:33, 1.45s/it] 26%|██▋ | 16276/61904 [8:22:47<18:53:48, 1.49s/it] 26%|██▋ | 16277/61904 [8:22:48<18:16:28, 1.44s/it] 26%|██▋ | 16278/61904 [8:22:50<18:06:04, 1.43s/it] 26%|██▋ | 16279/61904 [8:22:51<17:59:22, 1.42s/it] 26%|██▋ | 16280/61904 [8:22:52<17:24:31, 1.37s/it] {'loss': 2.7098, 'learning_rate': 1.7394010112796577e-07, 'epoch': 4.21} + 26%|██▋ | 16280/61904 [8:22:52<17:24:31, 1.37s/it] 26%|██▋ | 16281/61904 [8:22:54<17:11:42, 1.36s/it] 26%|██▋ | 16282/61904 [8:22:55<16:35:40, 1.31s/it] 26%|██▋ | 16283/61904 [8:22:56<17:40:03, 1.39s/it] 26%|██▋ | 16284/61904 [8:22:58<17:06:59, 1.35s/it] 26%|██▋ | 16285/61904 [8:22:59<17:57:09, 1.42s/it] 26%|██▋ | 16286/61904 [8:23:00<17:23:59, 1.37s/it] 26%|██▋ | 16287/61904 [8:23:02<17:15:22, 1.36s/it] 26%|██▋ | 16288/61904 [8:23:03<17:25:48, 1.38s/it] 26%|██▋ | 16289/61904 [8:23:05<17:39:13, 1.39s/it] 26%|██▋ | 16290/61904 [8:23:06<17:05:41, 1.35s/it] 26%|██▋ | 16291/61904 [8:23:07<17:21:46, 1.37s/it] 26%|██▋ | 16292/61904 [8:23:09<17:02:45, 1.35s/it] 26%|██▋ | 16293/61904 [8:23:10<16:36:06, 1.31s/it] 26%|██▋ | 16294/61904 [8:23:11<17:06:52, 1.35s/it] 26%|██▋ | 16295/61904 [8:23:13<16:53:44, 1.33s/it] 26%|██▋ | 16296/61904 [8:23:14<18:08:04, 1.43s/it] 26%|██▋ | 16297/61904 [8:23:15<17:45:14, 1.40s/it] 26%|██▋ | 16298/61904 [8:23:17<17:50:44, 1.41s/it] 26%|██▋ | 16299/61904 [8:23:18<18:08:26, 1.43s/it] 26%|██▋ | 16300/61904 [8:23:20<17:30:39, 1.38s/it] {'loss': 2.7066, 'learning_rate': 1.7390768831842343e-07, 'epoch': 4.21} + 26%|██▋ | 16300/61904 [8:23:20<17:30:39, 1.38s/it] 26%|██▋ | 16301/61904 [8:23:21<16:45:40, 1.32s/it] 26%|██▋ | 16302/61904 [8:23:22<16:55:54, 1.34s/it] 26%|██▋ | 16303/61904 [8:23:24<16:47:47, 1.33s/it] 26%|██▋ | 16304/61904 [8:23:25<16:19:14, 1.29s/it] 26%|██▋ | 16305/61904 [8:23:26<16:09:40, 1.28s/it] 26%|██▋ | 16306/61904 [8:23:27<15:53:13, 1.25s/it] 26%|██▋ | 16307/61904 [8:23:29<17:01:06, 1.34s/it] 26%|██▋ | 16308/61904 [8:23:30<17:05:46, 1.35s/it] 26%|██▋ | 16309/61904 [8:23:31<16:38:30, 1.31s/it] 26%|██▋ | 16310/61904 [8:23:33<16:52:55, 1.33s/it] 26%|██▋ | 16311/61904 [8:23:34<16:26:18, 1.30s/it] 26%|██▋ | 16312/61904 [8:23:35<16:49:37, 1.33s/it] 26%|██▋ | 16313/61904 [8:23:37<16:50:22, 1.33s/it] 26%|██▋ | 16314/61904 [8:23:38<16:58:56, 1.34s/it] 26%|██▋ | 16315/61904 [8:23:39<16:41:17, 1.32s/it] 26%|██▋ | 16316/61904 [8:23:41<17:42:18, 1.40s/it] 26%|██▋ | 16317/61904 [8:23:42<17:29:32, 1.38s/it] 26%|██▋ | 16318/61904 [8:23:44<17:27:35, 1.38s/it] 26%|██▋ | 16319/61904 [8:23:45<17:25:21, 1.38s/it] 26%|██▋ | 16320/61904 [8:23:47<18:04:57, 1.43s/it] {'loss': 2.7878, 'learning_rate': 1.7387527550888112e-07, 'epoch': 4.22} + 26%|██▋ | 16320/61904 [8:23:47<18:04:57, 1.43s/it] 26%|██▋ | 16321/61904 [8:23:48<17:51:57, 1.41s/it] 26%|██▋ | 16322/61904 [8:23:49<17:55:06, 1.42s/it] 26%|██▋ | 16323/61904 [8:23:51<18:09:38, 1.43s/it] 26%|██▋ | 16324/61904 [8:23:52<17:47:33, 1.41s/it] 26%|██▋ | 16325/61904 [8:23:53<17:31:32, 1.38s/it] 26%|██▋ | 16326/61904 [8:23:55<17:53:56, 1.41s/it] 26%|██▋ | 16327/61904 [8:23:56<17:39:07, 1.39s/it] 26%|██▋ | 16328/61904 [8:23:58<17:11:32, 1.36s/it] 26%|██▋ | 16329/61904 [8:23:59<17:10:01, 1.36s/it] 26%|██▋ | 16330/61904 [8:24:00<17:12:47, 1.36s/it] 26%|██▋ | 16331/61904 [8:24:02<17:10:19, 1.36s/it] 26%|██▋ | 16332/61904 [8:24:03<17:31:48, 1.38s/it] 26%|██▋ | 16333/61904 [8:24:05<17:49:43, 1.41s/it] 26%|██▋ | 16334/61904 [8:24:06<17:48:37, 1.41s/it] 26%|██▋ | 16335/61904 [8:24:07<18:05:18, 1.43s/it] 26%|██▋ | 16336/61904 [8:24:09<17:28:05, 1.38s/it] 26%|██▋ | 16337/61904 [8:24:10<18:23:36, 1.45s/it] 26%|██▋ | 16338/61904 [8:24:12<18:09:56, 1.44s/it] 26%|██▋ | 16339/61904 [8:24:13<17:29:26, 1.38s/it] 26%|██▋ | 16340/61904 [8:24:14<17:15:42, 1.36s/it] {'loss': 2.7026, 'learning_rate': 1.7384286269933876e-07, 'epoch': 4.22} + 26%|██▋ | 16340/61904 [8:24:14<17:15:42, 1.36s/it] 26%|██▋ | 16341/61904 [8:24:15<16:35:47, 1.31s/it] 26%|██▋ | 16342/61904 [8:24:17<16:48:54, 1.33s/it] 26%|██▋ | 16343/61904 [8:24:18<16:50:12, 1.33s/it] 26%|██▋ | 16344/61904 [8:24:20<16:49:10, 1.33s/it] 26%|██▋ | 16345/61904 [8:24:21<16:53:40, 1.33s/it] 26%|██▋ | 16346/61904 [8:24:22<17:18:08, 1.37s/it] 26%|██▋ | 16347/61904 [8:24:24<17:24:09, 1.38s/it] 26%|██▋ | 16348/61904 [8:24:25<17:12:43, 1.36s/it] 26%|██▋ | 16349/61904 [8:24:26<17:36:49, 1.39s/it] 26%|██▋ | 16350/61904 [8:24:28<17:39:07, 1.39s/it] 26%|██▋ | 16351/61904 [8:24:29<16:51:39, 1.33s/it] 26%|██▋ | 16352/61904 [8:24:30<16:51:31, 1.33s/it] 26%|██▋ | 16353/61904 [8:24:32<17:23:13, 1.37s/it] 26%|██▋ | 16354/61904 [8:24:33<17:24:16, 1.38s/it] 26%|██▋ | 16355/61904 [8:24:35<17:12:32, 1.36s/it] 26%|██▋ | 16356/61904 [8:24:36<16:54:08, 1.34s/it] 26%|██▋ | 16357/61904 [8:24:37<16:31:18, 1.31s/it] 26%|██▋ | 16358/61904 [8:24:38<16:52:14, 1.33s/it] 26%|██▋ | 16359/61904 [8:24:40<17:12:22, 1.36s/it] 26%|██▋ | 16360/61904 [8:24:41<17:10:33, 1.36s/it] {'loss': 2.739, 'learning_rate': 1.7381044988979644e-07, 'epoch': 4.23} + 26%|██▋ | 16360/61904 [8:24:41<17:10:33, 1.36s/it] 26%|██▋ | 16361/61904 [8:24:43<17:25:37, 1.38s/it] 26%|██▋ | 16362/61904 [8:24:44<17:20:56, 1.37s/it] 26%|██▋ | 16363/61904 [8:24:45<17:26:51, 1.38s/it] 26%|██▋ | 16364/61904 [8:24:47<17:37:58, 1.39s/it] 26%|██▋ | 16365/61904 [8:24:48<17:52:49, 1.41s/it] 26%|██▋ | 16366/61904 [8:24:50<17:56:07, 1.42s/it] 26%|██▋ | 16367/61904 [8:24:51<17:35:20, 1.39s/it] 26%|██▋ | 16368/61904 [8:24:52<17:20:00, 1.37s/it] 26%|██▋ | 16369/61904 [8:24:54<17:21:07, 1.37s/it] 26%|██▋ | 16370/61904 [8:24:55<17:46:31, 1.41s/it] 26%|██▋ | 16371/61904 [8:24:57<17:30:25, 1.38s/it] 26%|██▋ | 16372/61904 [8:24:58<17:50:58, 1.41s/it] 26%|██▋ | 16373/61904 [8:24:59<17:27:45, 1.38s/it] 26%|██▋ | 16374/61904 [8:25:01<17:28:02, 1.38s/it] 26%|██▋ | 16375/61904 [8:25:02<17:43:14, 1.40s/it] 26%|██▋ | 16376/61904 [8:25:04<17:18:13, 1.37s/it] 26%|██▋ | 16377/61904 [8:25:05<17:18:41, 1.37s/it] 26%|██▋ | 16378/61904 [8:25:06<17:06:56, 1.35s/it] 26%|██▋ | 16379/61904 [8:25:08<17:22:26, 1.37s/it] 26%|██▋ | 16380/61904 [8:25:09<17:15:55, 1.37s/it] {'loss': 2.7281, 'learning_rate': 1.7377803708025413e-07, 'epoch': 4.23} + 26%|██▋ | 16380/61904 [8:25:09<17:15:55, 1.37s/it] 26%|██▋ | 16381/61904 [8:25:10<17:18:26, 1.37s/it] 26%|██▋ | 16382/61904 [8:25:12<17:05:50, 1.35s/it] 26%|██▋ | 16383/61904 [8:25:13<17:09:05, 1.36s/it] 26%|██▋ | 16384/61904 [8:25:14<17:05:14, 1.35s/it] 26%|██▋ | 16385/61904 [8:25:16<17:25:28, 1.38s/it] 26%|██▋ | 16386/61904 [8:25:17<16:51:54, 1.33s/it] 26%|██▋ | 16387/61904 [8:25:18<16:26:20, 1.30s/it] 26%|██▋ | 16388/61904 [8:25:19<15:59:57, 1.27s/it] 26%|██▋ | 16389/61904 [8:25:21<16:17:31, 1.29s/it] 26%|██▋ | 16390/61904 [8:25:22<17:01:58, 1.35s/it] 26%|██▋ | 16391/61904 [8:25:24<17:19:45, 1.37s/it] 26%|██▋ | 16392/61904 [8:25:25<17:27:52, 1.38s/it] 26%|██▋ | 16393/61904 [8:25:26<17:21:59, 1.37s/it] 26%|██▋ | 16394/61904 [8:25:28<16:46:16, 1.33s/it] 26%|██▋ | 16395/61904 [8:25:29<16:59:12, 1.34s/it] 26%|██▋ | 16396/61904 [8:25:30<17:11:33, 1.36s/it] 26%|██▋ | 16397/61904 [8:25:32<17:02:39, 1.35s/it] 26%|██▋ | 16398/61904 [8:25:33<16:45:33, 1.33s/it] 26%|██▋ | 16399/61904 [8:25:34<16:23:16, 1.30s/it] 26%|██▋ | 16400/61904 [8:25:36<16:50:31, 1.33s/it] {'loss': 2.7053, 'learning_rate': 1.7374562427071177e-07, 'epoch': 4.24} + 26%|██▋ | 16400/61904 [8:25:36<16:50:31, 1.33s/it] 26%|██▋ | 16401/61904 [8:25:37<17:14:04, 1.36s/it] 26%|██▋ | 16402/61904 [8:25:38<16:51:59, 1.33s/it] 26%|██▋ | 16403/61904 [8:25:40<16:51:13, 1.33s/it] 26%|██▋ | 16404/61904 [8:25:41<17:04:42, 1.35s/it] 27%|██▋ | 16405/61904 [8:25:42<16:56:47, 1.34s/it] 27%|██▋ | 16406/61904 [8:25:44<17:13:17, 1.36s/it] 27%|██▋ | 16407/61904 [8:25:45<17:02:22, 1.35s/it] 27%|██▋ | 16408/61904 [8:25:47<18:19:54, 1.45s/it] 27%|██▋ | 16409/61904 [8:25:48<18:55:37, 1.50s/it] 27%|██▋ | 16410/61904 [8:25:50<18:49:30, 1.49s/it] 27%|██▋ | 16411/61904 [8:25:51<18:32:01, 1.47s/it] 27%|██▋ | 16412/61904 [8:25:53<18:15:50, 1.45s/it] 27%|██▋ | 16413/61904 [8:25:54<18:12:23, 1.44s/it] 27%|██▋ | 16414/61904 [8:25:56<17:58:29, 1.42s/it] 27%|██▋ | 16415/61904 [8:25:57<17:50:55, 1.41s/it] 27%|██▋ | 16416/61904 [8:25:58<17:41:23, 1.40s/it] 27%|██▋ | 16417/61904 [8:26:00<17:27:10, 1.38s/it] 27%|██▋ | 16418/61904 [8:26:01<17:34:44, 1.39s/it] 27%|██▋ | 16419/61904 [8:26:02<17:10:00, 1.36s/it] 27%|██▋ | 16420/61904 [8:26:04<17:18:44, 1.37s/it] {'loss': 2.6969, 'learning_rate': 1.7371321146116946e-07, 'epoch': 4.24} + 27%|██▋ | 16420/61904 [8:26:04<17:18:44, 1.37s/it] 27%|██▋ | 16421/61904 [8:26:05<17:36:52, 1.39s/it] 27%|██▋ | 16422/61904 [8:26:06<17:09:11, 1.36s/it] 27%|██▋ | 16423/61904 [8:26:08<16:59:06, 1.34s/it] 27%|██▋ | 16424/61904 [8:26:09<16:29:42, 1.31s/it] 27%|██▋ | 16425/61904 [8:26:10<16:49:26, 1.33s/it] 27%|██▋ | 16426/61904 [8:26:12<17:22:36, 1.38s/it] 27%|██▋ | 16427/61904 [8:26:13<16:38:41, 1.32s/it] 27%|██▋ | 16428/61904 [8:26:14<16:48:28, 1.33s/it] 27%|██▋ | 16429/61904 [8:26:16<17:06:17, 1.35s/it] 27%|██▋ | 16430/61904 [8:26:17<17:07:02, 1.36s/it] 27%|██▋ | 16431/61904 [8:26:18<17:01:18, 1.35s/it] 27%|██▋ | 16432/61904 [8:26:20<17:06:20, 1.35s/it] 27%|██▋ | 16433/61904 [8:26:21<16:52:21, 1.34s/it] 27%|██▋ | 16434/61904 [8:26:23<18:03:10, 1.43s/it] 27%|██▋ | 16435/61904 [8:26:24<17:38:21, 1.40s/it] 27%|██▋ | 16436/61904 [8:26:26<17:53:09, 1.42s/it] 27%|██▋ | 16437/61904 [8:26:27<17:37:39, 1.40s/it] 27%|██▋ | 16438/61904 [8:26:28<17:27:00, 1.38s/it] 27%|██▋ | 16439/61904 [8:26:30<17:36:43, 1.39s/it] 27%|██▋ | 16440/61904 [8:26:31<17:52:39, 1.42s/it] {'loss': 2.6516, 'learning_rate': 1.7368079865162712e-07, 'epoch': 4.25} + 27%|██▋ | 16440/61904 [8:26:31<17:52:39, 1.42s/it] 27%|██▋ | 16441/61904 [8:26:32<17:13:02, 1.36s/it] 27%|██▋ | 16442/61904 [8:26:34<17:20:25, 1.37s/it] 27%|██▋ | 16443/61904 [8:26:35<17:30:19, 1.39s/it] 27%|██▋ | 16444/61904 [8:26:37<17:34:14, 1.39s/it] 27%|██▋ | 16445/61904 [8:26:38<17:26:17, 1.38s/it] 27%|██▋ | 16446/61904 [8:26:39<17:33:19, 1.39s/it] 27%|██▋ | 16447/61904 [8:26:41<17:28:16, 1.38s/it] 27%|██▋ | 16448/61904 [8:26:42<17:23:20, 1.38s/it] 27%|██▋ | 16449/61904 [8:26:43<17:12:26, 1.36s/it] 27%|██▋ | 16450/61904 [8:26:45<17:05:34, 1.35s/it] 27%|██▋ | 16451/61904 [8:26:46<17:27:19, 1.38s/it] 27%|██▋ | 16452/61904 [8:26:48<17:18:11, 1.37s/it] 27%|██▋ | 16453/61904 [8:26:49<17:33:08, 1.39s/it] 27%|██▋ | 16454/61904 [8:26:50<17:18:10, 1.37s/it] 27%|██▋ | 16455/61904 [8:26:52<17:14:25, 1.37s/it] 27%|██▋ | 16456/61904 [8:26:53<17:27:18, 1.38s/it] 27%|██▋ | 16457/61904 [8:26:55<17:33:52, 1.39s/it] 27%|██▋ | 16458/61904 [8:26:56<17:51:16, 1.41s/it] 27%|██▋ | 16459/61904 [8:26:57<17:50:11, 1.41s/it] 27%|██▋ | 16460/61904 [8:26:59<17:11:59, 1.36s/it] {'loss': 2.7487, 'learning_rate': 1.7364838584208478e-07, 'epoch': 4.25} + 27%|██▋ | 16460/61904 [8:26:59<17:11:59, 1.36s/it] 27%|██▋ | 16461/61904 [8:27:00<17:35:49, 1.39s/it] 27%|██▋ | 16462/61904 [8:27:02<18:06:55, 1.44s/it] 27%|██▋ | 16463/61904 [8:27:03<17:48:49, 1.41s/it] 27%|██▋ | 16464/61904 [8:27:04<17:27:00, 1.38s/it] 27%|██▋ | 16465/61904 [8:27:06<17:09:05, 1.36s/it] 27%|██▋ | 16466/61904 [8:27:07<17:18:23, 1.37s/it] 27%|██▋ | 16467/61904 [8:27:08<17:26:40, 1.38s/it] 27%|██▋ | 16468/61904 [8:27:10<17:03:46, 1.35s/it] 27%|██▋ | 16469/61904 [8:27:11<16:43:20, 1.32s/it] 27%|██▋ | 16470/61904 [8:27:12<17:06:48, 1.36s/it] 27%|██▋ | 16471/61904 [8:27:14<16:28:26, 1.31s/it] 27%|██▋ | 16472/61904 [8:27:15<16:36:40, 1.32s/it] 27%|██▋ | 16473/61904 [8:27:16<16:42:55, 1.32s/it] 27%|██▋ | 16474/61904 [8:27:18<17:08:51, 1.36s/it] 27%|██▋ | 16475/61904 [8:27:19<17:09:12, 1.36s/it] 27%|██▋ | 16476/61904 [8:27:20<17:15:22, 1.37s/it] 27%|██▋ | 16477/61904 [8:27:22<17:15:08, 1.37s/it] 27%|██▋ | 16478/61904 [8:27:23<16:56:31, 1.34s/it] 27%|██▋ | 16479/61904 [8:27:25<17:36:35, 1.40s/it] 27%|██▋ | 16480/61904 [8:27:26<17:42:42, 1.40s/it] {'loss': 2.7162, 'learning_rate': 1.7361597303254247e-07, 'epoch': 4.26} + 27%|██▋ | 16480/61904 [8:27:26<17:42:42, 1.40s/it] 27%|██▋ | 16481/61904 [8:27:27<16:56:15, 1.34s/it] 27%|██▋ | 16482/61904 [8:27:29<16:56:10, 1.34s/it] 27%|██▋ | 16483/61904 [8:27:30<17:18:56, 1.37s/it] 27%|██▋ | 16484/61904 [8:27:31<17:26:15, 1.38s/it] 27%|██▋ | 16485/61904 [8:27:33<17:37:54, 1.40s/it] 27%|██▋ | 16486/61904 [8:27:34<17:33:34, 1.39s/it] 27%|██▋ | 16487/61904 [8:27:36<17:36:28, 1.40s/it] 27%|██▋ | 16488/61904 [8:27:37<17:19:20, 1.37s/it] 27%|██▋ | 16489/61904 [8:27:38<17:08:23, 1.36s/it] 27%|██▋ | 16490/61904 [8:27:40<16:50:34, 1.34s/it] 27%|██▋ | 16491/61904 [8:27:41<17:34:48, 1.39s/it] 27%|██▋ | 16492/61904 [8:27:43<18:12:14, 1.44s/it] 27%|██▋ | 16493/61904 [8:27:44<17:29:17, 1.39s/it] 27%|██▋ | 16494/61904 [8:27:45<16:56:42, 1.34s/it] 27%|██▋ | 16495/61904 [8:27:47<17:44:08, 1.41s/it] 27%|██▋ | 16496/61904 [8:27:48<17:20:08, 1.37s/it] 27%|██▋ | 16497/61904 [8:27:49<17:25:35, 1.38s/it] 27%|██▋ | 16498/61904 [8:27:51<17:07:17, 1.36s/it] 27%|██▋ | 16499/61904 [8:27:52<16:59:14, 1.35s/it] 27%|██▋ | 16500/61904 [8:27:53<17:15:26, 1.37s/it] {'loss': 2.7143, 'learning_rate': 1.735835602230001e-07, 'epoch': 4.26} + 27%|██▋ | 16500/61904 [8:27:53<17:15:26, 1.37s/it] 27%|██▋ | 16501/61904 [8:27:55<17:00:10, 1.35s/it] 27%|██▋ | 16502/61904 [8:27:56<17:01:37, 1.35s/it] 27%|██▋ | 16503/61904 [8:27:57<17:03:36, 1.35s/it] 27%|██▋ | 16504/61904 [8:27:59<17:07:41, 1.36s/it] 27%|██▋ | 16505/61904 [8:28:00<17:25:17, 1.38s/it] 27%|██▋ | 16506/61904 [8:28:02<17:52:26, 1.42s/it] 27%|██▋ | 16507/61904 [8:28:03<17:34:53, 1.39s/it] 27%|██▋ | 16508/61904 [8:28:04<17:25:29, 1.38s/it] 27%|██▋ | 16509/61904 [8:28:06<17:30:02, 1.39s/it] 27%|██▋ | 16510/61904 [8:28:07<16:55:40, 1.34s/it] 27%|██▋ | 16511/61904 [8:28:08<16:35:59, 1.32s/it] 27%|██▋ | 16512/61904 [8:28:10<16:38:08, 1.32s/it] 27%|██▋ | 16513/61904 [8:28:11<16:32:44, 1.31s/it] 27%|██▋ | 16514/61904 [8:28:12<17:03:11, 1.35s/it] 27%|██▋ | 16515/61904 [8:28:14<17:13:17, 1.37s/it] 27%|██▋ | 16516/61904 [8:28:15<17:08:25, 1.36s/it] 27%|██▋ | 16517/61904 [8:28:16<16:48:42, 1.33s/it] 27%|██▋ | 16518/61904 [8:28:18<16:50:52, 1.34s/it] 27%|██▋ | 16519/61904 [8:28:19<17:22:51, 1.38s/it] 27%|██▋ | 16520/61904 [8:28:21<17:20:05, 1.38s/it] {'loss': 2.7663, 'learning_rate': 1.735511474134578e-07, 'epoch': 4.27} + 27%|██▋ | 16520/61904 [8:28:21<17:20:05, 1.38s/it] 27%|██▋ | 16521/61904 [8:28:22<17:09:05, 1.36s/it] 27%|██▋ | 16522/61904 [8:28:24<17:58:18, 1.43s/it] 27%|██▋ | 16523/61904 [8:28:25<17:32:22, 1.39s/it] 27%|██▋ | 16524/61904 [8:28:26<17:06:15, 1.36s/it] 27%|██▋ | 16525/61904 [8:28:27<17:00:52, 1.35s/it] 27%|██▋ | 16526/61904 [8:28:29<17:07:28, 1.36s/it] 27%|██▋ | 16527/61904 [8:28:30<16:21:10, 1.30s/it] 27%|██▋ | 16528/61904 [8:28:31<16:31:11, 1.31s/it] 27%|██▋ | 16529/61904 [8:28:33<17:38:44, 1.40s/it] 27%|██▋ | 16530/61904 [8:28:34<17:36:37, 1.40s/it] 27%|██▋ | 16531/61904 [8:28:36<17:29:36, 1.39s/it] 27%|██▋ | 16532/61904 [8:28:37<17:16:09, 1.37s/it] 27%|██▋ | 16533/61904 [8:28:39<17:35:07, 1.40s/it] 27%|██▋ | 16534/61904 [8:28:40<17:47:24, 1.41s/it] 27%|██▋ | 16535/61904 [8:28:41<17:10:31, 1.36s/it] 27%|██▋ | 16536/61904 [8:28:43<17:04:56, 1.36s/it] 27%|██▋ | 16537/61904 [8:28:44<16:58:27, 1.35s/it] 27%|██▋ | 16538/61904 [8:28:45<17:24:17, 1.38s/it] 27%|██▋ | 16539/61904 [8:28:47<17:25:06, 1.38s/it] 27%|██▋ | 16540/61904 [8:28:48<17:34:37, 1.39s/it] {'loss': 2.7353, 'learning_rate': 1.7351873460391548e-07, 'epoch': 4.27} + 27%|██▋ | 16540/61904 [8:28:48<17:34:37, 1.39s/it] 27%|██▋ | 16541/61904 [8:28:50<17:43:21, 1.41s/it] 27%|██▋ | 16542/61904 [8:28:51<17:35:38, 1.40s/it] 27%|██▋ | 16543/61904 [8:28:52<17:11:39, 1.36s/it] 27%|██▋ | 16544/61904 [8:28:54<17:26:52, 1.38s/it] 27%|██▋ | 16545/61904 [8:28:55<17:08:14, 1.36s/it] 27%|██▋ | 16546/61904 [8:28:56<17:25:37, 1.38s/it] 27%|██▋ | 16547/61904 [8:28:58<17:39:21, 1.40s/it] 27%|██▋ | 16548/61904 [8:28:59<18:13:40, 1.45s/it] 27%|██▋ | 16549/61904 [8:29:01<17:42:23, 1.41s/it] 27%|██▋ | 16550/61904 [8:29:02<17:56:41, 1.42s/it] 27%|██▋ | 16551/61904 [8:29:04<17:36:01, 1.40s/it] 27%|██▋ | 16552/61904 [8:29:05<17:40:14, 1.40s/it] 27%|██▋ | 16553/61904 [8:29:06<17:24:24, 1.38s/it] 27%|██▋ | 16554/61904 [8:29:08<17:08:39, 1.36s/it] 27%|██▋ | 16555/61904 [8:29:09<17:10:49, 1.36s/it] 27%|██▋ | 16556/61904 [8:29:10<16:33:39, 1.31s/it] 27%|██▋ | 16557/61904 [8:29:12<16:52:12, 1.34s/it] 27%|██▋ | 16558/61904 [8:29:13<17:07:16, 1.36s/it] 27%|██▋ | 16559/61904 [8:29:14<16:58:59, 1.35s/it] 27%|██▋ | 16560/61904 [8:29:16<16:52:31, 1.34s/it] {'loss': 2.7334, 'learning_rate': 1.7348632179437312e-07, 'epoch': 4.28} + 27%|██▋ | 16560/61904 [8:29:16<16:52:31, 1.34s/it] 27%|██▋ | 16561/61904 [8:29:17<16:28:04, 1.31s/it] 27%|██▋ | 16562/61904 [8:29:18<16:34:59, 1.32s/it] 27%|██▋ | 16563/61904 [8:29:19<16:37:08, 1.32s/it] 27%|██▋ | 16564/61904 [8:29:21<16:44:54, 1.33s/it] 27%|██▋ | 16565/61904 [8:29:22<16:45:43, 1.33s/it] 27%|██▋ | 16566/61904 [8:29:24<17:29:03, 1.39s/it] 27%|██▋ | 16567/61904 [8:29:25<17:13:07, 1.37s/it] 27%|██▋ | 16568/61904 [8:29:26<17:18:46, 1.37s/it] 27%|██▋ | 16569/61904 [8:29:28<17:26:38, 1.39s/it] 27%|██▋ | 16570/61904 [8:29:29<17:37:07, 1.40s/it] 27%|██▋ | 16571/61904 [8:29:31<17:27:03, 1.39s/it] 27%|██▋ | 16572/61904 [8:29:32<17:21:49, 1.38s/it] 27%|██▋ | 16573/61904 [8:29:33<17:31:24, 1.39s/it] 27%|██▋ | 16574/61904 [8:29:35<17:15:46, 1.37s/it] 27%|██▋ | 16575/61904 [8:29:36<16:57:07, 1.35s/it] 27%|██▋ | 16576/61904 [8:29:37<16:42:58, 1.33s/it] 27%|██▋ | 16577/61904 [8:29:39<16:51:22, 1.34s/it] 27%|██▋ | 16578/61904 [8:29:40<16:48:32, 1.34s/it] 27%|██▋ | 16579/61904 [8:29:41<17:08:07, 1.36s/it] 27%|██▋ | 16580/61904 [8:29:43<17:01:30, 1.35s/it] {'loss': 2.7595, 'learning_rate': 1.734539089848308e-07, 'epoch': 4.28} + 27%|██▋ | 16580/61904 [8:29:43<17:01:30, 1.35s/it] 27%|██▋ | 16581/61904 [8:29:44<17:02:52, 1.35s/it] 27%|██▋ | 16582/61904 [8:29:45<16:56:01, 1.35s/it] 27%|██▋ | 16583/61904 [8:29:47<16:38:23, 1.32s/it] 27%|██▋ | 16584/61904 [8:29:48<16:58:28, 1.35s/it] 27%|██▋ | 16585/61904 [8:29:49<17:05:43, 1.36s/it] 27%|██▋ | 16586/61904 [8:29:51<17:01:42, 1.35s/it] 27%|██▋ | 16587/61904 [8:29:52<17:38:53, 1.40s/it] 27%|██▋ | 16588/61904 [8:29:54<17:52:20, 1.42s/it] 27%|██▋ | 16589/61904 [8:29:55<17:48:08, 1.41s/it] 27%|██▋ | 16590/61904 [8:29:57<17:48:24, 1.41s/it] 27%|██▋ | 16591/61904 [8:29:58<17:23:16, 1.38s/it] 27%|██▋ | 16592/61904 [8:29:59<17:16:46, 1.37s/it] 27%|██▋ | 16593/61904 [8:30:01<16:51:27, 1.34s/it] 27%|██▋ | 16594/61904 [8:30:02<16:37:44, 1.32s/it] 27%|██▋ | 16595/61904 [8:30:03<16:47:53, 1.33s/it] 27%|██▋ | 16596/61904 [8:30:05<17:01:19, 1.35s/it] 27%|██▋ | 16597/61904 [8:30:06<17:15:16, 1.37s/it] 27%|██▋ | 16598/61904 [8:30:07<17:33:12, 1.39s/it] 27%|██▋ | 16599/61904 [8:30:09<17:38:22, 1.40s/it] 27%|██▋ | 16600/61904 [8:30:10<18:05:39, 1.44s/it] {'loss': 2.7325, 'learning_rate': 1.7342149617528847e-07, 'epoch': 4.29} + 27%|██▋ | 16600/61904 [8:30:10<18:05:39, 1.44s/it] 27%|██▋ | 16601/61904 [8:30:12<17:34:23, 1.40s/it] 27%|██▋ | 16602/61904 [8:30:13<17:14:43, 1.37s/it] 27%|██▋ | 16603/61904 [8:30:14<17:23:58, 1.38s/it] 27%|██▋ | 16604/61904 [8:30:16<17:38:09, 1.40s/it] 27%|██▋ | 16605/61904 [8:30:17<17:42:52, 1.41s/it] 27%|██▋ | 16606/61904 [8:30:19<17:16:57, 1.37s/it] 27%|██▋ | 16607/61904 [8:30:20<16:55:14, 1.34s/it] 27%|██▋ | 16608/61904 [8:30:21<17:25:31, 1.38s/it] 27%|██▋ | 16609/61904 [8:30:23<17:23:46, 1.38s/it] 27%|██▋ | 16610/61904 [8:30:24<17:14:02, 1.37s/it] 27%|██▋ | 16611/61904 [8:30:25<17:06:52, 1.36s/it] 27%|██▋ | 16612/61904 [8:30:27<16:54:33, 1.34s/it] 27%|██▋ | 16613/61904 [8:30:28<16:31:06, 1.31s/it] 27%|██▋ | 16614/61904 [8:30:29<16:07:37, 1.28s/it] 27%|██▋ | 16615/61904 [8:30:30<15:52:43, 1.26s/it] 27%|██▋ | 16616/61904 [8:30:32<16:28:57, 1.31s/it] 27%|██▋ | 16617/61904 [8:30:33<16:46:46, 1.33s/it] 27%|██▋ | 16618/61904 [8:30:35<17:22:21, 1.38s/it] 27%|██▋ | 16619/61904 [8:30:36<17:37:45, 1.40s/it] 27%|██▋ | 16620/61904 [8:30:38<17:45:53, 1.41s/it] {'loss': 2.733, 'learning_rate': 1.7338908336574613e-07, 'epoch': 4.3} + 27%|██▋ | 16620/61904 [8:30:38<17:45:53, 1.41s/it] 27%|██▋ | 16621/61904 [8:30:39<17:38:15, 1.40s/it] 27%|██▋ | 16622/61904 [8:30:40<17:36:37, 1.40s/it] 27%|██▋ | 16623/61904 [8:30:42<17:05:53, 1.36s/it] 27%|██▋ | 16624/61904 [8:30:43<16:54:37, 1.34s/it] 27%|██▋ | 16625/61904 [8:30:44<17:00:01, 1.35s/it] 27%|██▋ | 16626/61904 [8:30:46<17:05:45, 1.36s/it] 27%|██▋ | 16627/61904 [8:30:47<17:34:56, 1.40s/it] 27%|██▋ | 16628/61904 [8:30:49<17:37:46, 1.40s/it] 27%|██▋ | 16629/61904 [8:30:50<18:31:33, 1.47s/it] 27%|██▋ | 16630/61904 [8:30:52<18:13:53, 1.45s/it] 27%|██▋ | 16631/61904 [8:30:53<18:00:11, 1.43s/it] 27%|██▋ | 16632/61904 [8:30:54<17:11:05, 1.37s/it] 27%|██▋ | 16633/61904 [8:30:55<17:07:01, 1.36s/it] 27%|██▋ | 16634/61904 [8:30:57<16:59:41, 1.35s/it] 27%|██▋ | 16635/61904 [8:30:58<16:43:22, 1.33s/it] 27%|██▋ | 16636/61904 [8:31:00<17:02:23, 1.36s/it] 27%|██▋ | 16637/61904 [8:31:01<17:35:39, 1.40s/it] 27%|██▋ | 16638/61904 [8:31:02<17:35:21, 1.40s/it] 27%|██▋ | 16639/61904 [8:31:04<17:00:15, 1.35s/it] 27%|██▋ | 16640/61904 [8:31:05<16:52:27, 1.34s/it] {'loss': 2.7718, 'learning_rate': 1.7335667055620382e-07, 'epoch': 4.3} + 27%|██▋ | 16640/61904 [8:31:05<16:52:27, 1.34s/it] 27%|██▋ | 16641/61904 [8:31:06<16:45:32, 1.33s/it] 27%|██▋ | 16642/61904 [8:31:08<17:19:17, 1.38s/it] 27%|██▋ | 16643/61904 [8:31:09<17:12:24, 1.37s/it] 27%|██▋ | 16644/61904 [8:31:11<17:30:29, 1.39s/it] 27%|██▋ | 16645/61904 [8:31:12<17:29:05, 1.39s/it] 27%|██▋ | 16646/61904 [8:31:13<17:31:29, 1.39s/it] 27%|██▋ | 16647/61904 [8:31:15<17:22:31, 1.38s/it] 27%|██▋ | 16648/61904 [8:31:16<17:58:42, 1.43s/it] 27%|██▋ | 16649/61904 [8:31:18<17:46:55, 1.41s/it] 27%|██▋ | 16650/61904 [8:31:19<17:40:01, 1.41s/it] 27%|██▋ | 16651/61904 [8:31:20<17:19:54, 1.38s/it] 27%|██▋ | 16652/61904 [8:31:22<17:13:10, 1.37s/it] 27%|██▋ | 16653/61904 [8:31:23<17:51:10, 1.42s/it] 27%|██▋ | 16654/61904 [8:31:24<17:09:10, 1.36s/it] 27%|██▋ | 16655/61904 [8:31:26<17:10:56, 1.37s/it] 27%|██▋ | 16656/61904 [8:31:27<17:19:46, 1.38s/it] 27%|██▋ | 16657/61904 [8:31:29<16:59:05, 1.35s/it] 27%|██▋ | 16658/61904 [8:31:30<16:57:05, 1.35s/it] 27%|██▋ | 16659/61904 [8:31:31<17:03:31, 1.36s/it] 27%|██▋ | 16660/61904 [8:31:33<17:40:11, 1.41s/it] {'loss': 2.7603, 'learning_rate': 1.7332425774666148e-07, 'epoch': 4.31} + 27%|██▋ | 16660/61904 [8:31:33<17:40:11, 1.41s/it] 27%|██▋ | 16661/61904 [8:31:34<17:05:23, 1.36s/it] 27%|██▋ | 16662/61904 [8:31:35<17:13:27, 1.37s/it] 27%|██▋ | 16663/61904 [8:31:37<17:27:43, 1.39s/it] 27%|██▋ | 16664/61904 [8:31:38<17:31:54, 1.40s/it] 27%|██▋ | 16665/61904 [8:31:40<17:37:13, 1.40s/it] 27%|██▋ | 16666/61904 [8:31:41<17:18:17, 1.38s/it] 27%|██▋ | 16667/61904 [8:31:42<17:15:45, 1.37s/it] 27%|██▋ | 16668/61904 [8:31:44<16:49:32, 1.34s/it] 27%|██▋ | 16669/61904 [8:31:45<16:47:16, 1.34s/it] 27%|██▋ | 16670/61904 [8:31:46<17:06:56, 1.36s/it] 27%|██▋ | 16671/61904 [8:31:48<17:22:59, 1.38s/it] 27%|██▋ | 16672/61904 [8:31:49<17:19:26, 1.38s/it] 27%|██▋ | 16673/61904 [8:31:50<16:58:26, 1.35s/it] 27%|██▋ | 16674/61904 [8:31:52<17:01:58, 1.36s/it] 27%|██▋ | 16675/61904 [8:31:53<17:45:29, 1.41s/it] 27%|██▋ | 16676/61904 [8:31:55<17:18:24, 1.38s/it] 27%|██▋ | 16677/61904 [8:31:56<17:37:37, 1.40s/it] 27%|██▋ | 16678/61904 [8:31:58<17:36:59, 1.40s/it] 27%|██▋ | 16679/61904 [8:31:59<17:14:34, 1.37s/it] 27%|██▋ | 16680/61904 [8:32:00<17:27:58, 1.39s/it] {'loss': 2.7269, 'learning_rate': 1.7329184493711914e-07, 'epoch': 4.31} + 27%|██▋ | 16680/61904 [8:32:00<17:27:58, 1.39s/it] 27%|██▋ | 16681/61904 [8:32:02<17:07:27, 1.36s/it] 27%|██▋ | 16682/61904 [8:32:03<16:25:02, 1.31s/it] 27%|██▋ | 16683/61904 [8:32:04<16:37:25, 1.32s/it] 27%|██▋ | 16684/61904 [8:32:05<16:30:11, 1.31s/it] 27%|██▋ | 16685/61904 [8:32:07<17:25:35, 1.39s/it] 27%|██▋ | 16686/61904 [8:32:08<17:50:49, 1.42s/it] 27%|██▋ | 16687/61904 [8:32:10<17:32:35, 1.40s/it] 27%|██▋ | 16688/61904 [8:32:11<17:27:33, 1.39s/it] 27%|██▋ | 16689/61904 [8:32:12<17:03:17, 1.36s/it] 27%|██▋ | 16690/61904 [8:32:14<16:38:47, 1.33s/it] 27%|██▋ | 16691/61904 [8:32:15<16:54:56, 1.35s/it] 27%|██▋ | 16692/61904 [8:32:17<17:12:57, 1.37s/it] 27%|██▋ | 16693/61904 [8:32:18<16:44:44, 1.33s/it] 27%|██▋ | 16694/61904 [8:32:19<17:08:11, 1.36s/it] 27%|██▋ | 16695/61904 [8:32:21<17:10:43, 1.37s/it] 27%|██▋ | 16696/61904 [8:32:22<16:56:48, 1.35s/it] 27%|██▋ | 16697/61904 [8:32:23<16:46:17, 1.34s/it] 27%|██▋ | 16698/61904 [8:32:24<16:37:05, 1.32s/it] 27%|██▋ | 16699/61904 [8:32:26<16:45:55, 1.34s/it] 27%|██▋ | 16700/61904 [8:32:27<16:26:53, 1.31s/it] {'loss': 2.7934, 'learning_rate': 1.7325943212757683e-07, 'epoch': 4.32} + 27%|██▋ | 16700/61904 [8:32:27<16:26:53, 1.31s/it] 27%|██▋ | 16701/61904 [8:32:28<16:42:17, 1.33s/it] 27%|██▋ | 16702/61904 [8:32:30<17:20:43, 1.38s/it] 27%|██▋ | 16703/61904 [8:32:31<17:11:45, 1.37s/it] 27%|██▋ | 16704/61904 [8:32:33<17:18:09, 1.38s/it] 27%|██▋ | 16705/61904 [8:32:34<17:03:00, 1.36s/it] 27%|██▋ | 16706/61904 [8:32:35<16:32:50, 1.32s/it] 27%|██▋ | 16707/61904 [8:32:37<17:15:36, 1.37s/it] 27%|██▋ | 16708/61904 [8:32:38<17:14:58, 1.37s/it] 27%|██▋ | 16709/61904 [8:32:40<17:41:06, 1.41s/it] 27%|██▋ | 16710/61904 [8:32:41<18:21:33, 1.46s/it] 27%|██▋ | 16711/61904 [8:32:43<18:14:25, 1.45s/it] 27%|██▋ | 16712/61904 [8:32:44<18:17:46, 1.46s/it] 27%|██▋ | 16713/61904 [8:32:46<18:55:49, 1.51s/it] 27%|██▋ | 16714/61904 [8:32:47<18:20:32, 1.46s/it] 27%|██▋ | 16715/61904 [8:32:48<18:05:07, 1.44s/it] 27%|██▋ | 16716/61904 [8:32:50<18:55:31, 1.51s/it] 27%|██▋ | 16717/61904 [8:32:51<18:15:19, 1.45s/it] 27%|██▋ | 16718/61904 [8:32:53<18:17:37, 1.46s/it] 27%|██▋ | 16719/61904 [8:32:54<17:55:10, 1.43s/it] 27%|██▋ | 16720/61904 [8:32:56<17:39:28, 1.41s/it] {'loss': 2.7189, 'learning_rate': 1.7322701931803446e-07, 'epoch': 4.32} + 27%|██▋ | 16720/61904 [8:32:56<17:39:28, 1.41s/it] 27%|██▋ | 16721/61904 [8:32:57<17:10:26, 1.37s/it] 27%|██▋ | 16722/61904 [8:32:58<17:54:23, 1.43s/it] 27%|██▋ | 16723/61904 [8:33:00<17:59:38, 1.43s/it] 27%|██▋ | 16724/61904 [8:33:01<17:49:31, 1.42s/it] 27%|██▋ | 16725/61904 [8:33:03<17:51:48, 1.42s/it] 27%|██▋ | 16726/61904 [8:33:04<17:29:08, 1.39s/it] 27%|██▋ | 16727/61904 [8:33:05<17:21:44, 1.38s/it] 27%|██▋ | 16728/61904 [8:33:07<18:00:10, 1.43s/it] 27%|██▋ | 16729/61904 [8:33:09<18:28:48, 1.47s/it] 27%|██▋ | 16730/61904 [8:33:10<17:50:50, 1.42s/it] 27%|██▋ | 16731/61904 [8:33:11<17:25:58, 1.39s/it] 27%|██▋ | 16732/61904 [8:33:12<16:33:13, 1.32s/it] 27%|██▋ | 16733/61904 [8:33:14<16:14:38, 1.29s/it] 27%|██▋ | 16734/61904 [8:33:15<16:52:28, 1.34s/it] 27%|██▋ | 16735/61904 [8:33:16<16:50:14, 1.34s/it] 27%|██▋ | 16736/61904 [8:33:18<16:53:05, 1.35s/it] 27%|██▋ | 16737/61904 [8:33:19<16:19:14, 1.30s/it] 27%|██▋ | 16738/61904 [8:33:20<16:29:13, 1.31s/it] 27%|██▋ | 16739/61904 [8:33:22<16:27:14, 1.31s/it] 27%|██▋ | 16740/61904 [8:33:23<17:02:13, 1.36s/it] {'loss': 2.6768, 'learning_rate': 1.7319460650849215e-07, 'epoch': 4.33} + 27%|██▋ | 16740/61904 [8:33:23<17:02:13, 1.36s/it] 27%|██▋ | 16741/61904 [8:33:24<16:45:09, 1.34s/it] 27%|██▋ | 16742/61904 [8:33:26<16:53:52, 1.35s/it] 27%|██▋ | 16743/61904 [8:33:27<16:32:36, 1.32s/it] 27%|██▋ | 16744/61904 [8:33:28<16:24:28, 1.31s/it] 27%|██▋ | 16745/61904 [8:33:30<16:24:36, 1.31s/it] 27%|██▋ | 16746/61904 [8:33:31<16:25:09, 1.31s/it] 27%|██▋ | 16747/61904 [8:33:32<16:49:17, 1.34s/it] 27%|██▋ | 16748/61904 [8:33:34<17:24:20, 1.39s/it] 27%|██▋ | 16749/61904 [8:33:35<17:31:20, 1.40s/it] 27%|██▋ | 16750/61904 [8:33:37<17:15:40, 1.38s/it] 27%|██▋ | 16751/61904 [8:33:38<17:19:02, 1.38s/it] 27%|██▋ | 16752/61904 [8:33:39<17:38:22, 1.41s/it] 27%|██▋ | 16753/61904 [8:33:41<17:36:25, 1.40s/it] 27%|██▋ | 16754/61904 [8:33:42<17:09:01, 1.37s/it] 27%|██▋ | 16755/61904 [8:33:43<16:56:32, 1.35s/it] 27%|██▋ | 16756/61904 [8:33:45<16:46:47, 1.34s/it] 27%|██▋ | 16757/61904 [8:33:46<16:58:44, 1.35s/it] 27%|██▋ | 16758/61904 [8:33:47<17:10:22, 1.37s/it] 27%|██��� | 16759/61904 [8:33:49<17:20:12, 1.38s/it] 27%|██▋ | 16760/61904 [8:33:50<17:05:18, 1.36s/it] {'loss': 2.7036, 'learning_rate': 1.7316219369894984e-07, 'epoch': 4.33} + 27%|██▋ | 16760/61904 [8:33:50<17:05:18, 1.36s/it] 27%|██▋ | 16761/61904 [8:33:52<17:05:53, 1.36s/it] 27%|██▋ | 16762/61904 [8:33:53<17:06:13, 1.36s/it] 27%|██▋ | 16763/61904 [8:33:54<17:37:13, 1.41s/it] 27%|██▋ | 16764/61904 [8:33:56<17:21:32, 1.38s/it] 27%|██▋ | 16765/61904 [8:33:57<17:06:16, 1.36s/it] 27%|██▋ | 16766/61904 [8:33:58<17:12:55, 1.37s/it] 27%|██▋ | 16767/61904 [8:34:00<17:02:10, 1.36s/it] 27%|██▋ | 16768/61904 [8:34:01<17:38:15, 1.41s/it] 27%|██▋ | 16769/61904 [8:34:03<17:12:25, 1.37s/it] 27%|██▋ | 16770/61904 [8:34:04<17:05:22, 1.36s/it] 27%|██▋ | 16771/61904 [8:34:05<17:32:39, 1.40s/it] 27%|██▋ | 16772/61904 [8:34:07<17:14:44, 1.38s/it] 27%|██▋ | 16773/61904 [8:34:08<17:19:09, 1.38s/it] 27%|██▋ | 16774/61904 [8:34:09<17:01:30, 1.36s/it] 27%|██▋ | 16775/61904 [8:34:11<16:52:24, 1.35s/it] 27%|██▋ | 16776/61904 [8:34:12<17:36:32, 1.40s/it] 27%|██▋ | 16777/61904 [8:34:14<17:15:07, 1.38s/it] 27%|██▋ | 16778/61904 [8:34:15<17:37:42, 1.41s/it] 27%|██▋ | 16779/61904 [8:34:16<17:19:06, 1.38s/it] 27%|██▋ | 16780/61904 [8:34:18<16:55:43, 1.35s/it] {'loss': 2.7489, 'learning_rate': 1.7312978088940748e-07, 'epoch': 4.34} + 27%|██▋ | 16780/61904 [8:34:18<16:55:43, 1.35s/it] 27%|██▋ | 16781/61904 [8:34:19<16:47:09, 1.34s/it] 27%|██▋ | 16782/61904 [8:34:20<16:37:57, 1.33s/it] 27%|██▋ | 16783/61904 [8:34:22<18:11:29, 1.45s/it] 27%|██▋ | 16784/61904 [8:34:23<18:06:09, 1.44s/it] 27%|██▋ | 16785/61904 [8:34:25<17:24:03, 1.39s/it] 27%|██▋ | 16786/61904 [8:34:26<17:24:07, 1.39s/it] 27%|██▋ | 16787/61904 [8:34:27<17:20:39, 1.38s/it] 27%|██▋ | 16788/61904 [8:34:29<17:24:35, 1.39s/it] 27%|██▋ | 16789/61904 [8:34:30<17:19:09, 1.38s/it] 27%|██▋ | 16790/61904 [8:34:32<17:27:19, 1.39s/it] 27%|██▋ | 16791/61904 [8:34:33<17:38:12, 1.41s/it] 27%|██▋ | 16792/61904 [8:34:34<17:18:17, 1.38s/it] 27%|██▋ | 16793/61904 [8:34:36<16:58:01, 1.35s/it] 27%|██▋ | 16794/61904 [8:34:37<16:56:25, 1.35s/it] 27%|██▋ | 16795/61904 [8:34:38<16:51:35, 1.35s/it] 27%|██▋ | 16796/61904 [8:34:40<16:49:44, 1.34s/it] 27%|██▋ | 16797/61904 [8:34:41<17:01:47, 1.36s/it] 27%|██▋ | 16798/61904 [8:34:42<16:55:29, 1.35s/it] 27%|██▋ | 16799/61904 [8:34:44<16:46:09, 1.34s/it] 27%|██▋ | 16800/61904 [8:34:45<16:43:26, 1.33s/it] {'loss': 2.7534, 'learning_rate': 1.7309736807986516e-07, 'epoch': 4.34} + 27%|██▋ | 16800/61904 [8:34:45<16:43:26, 1.33s/it] 27%|██▋ | 16801/61904 [8:34:46<16:43:54, 1.34s/it] 27%|██▋ | 16802/61904 [8:34:48<16:18:37, 1.30s/it] 27%|██▋ | 16803/61904 [8:34:49<16:48:51, 1.34s/it] 27%|██▋ | 16804/61904 [8:34:50<16:50:51, 1.34s/it] 27%|██▋ | 16805/61904 [8:34:52<16:51:55, 1.35s/it] 27%|██▋ | 16806/61904 [8:34:53<16:15:23, 1.30s/it] 27%|██▋ | 16807/61904 [8:34:54<16:44:11, 1.34s/it] 27%|██▋ | 16808/61904 [8:34:56<16:02:47, 1.28s/it] 27%|██▋ | 16809/61904 [8:34:57<16:07:42, 1.29s/it] 27%|██▋ | 16810/61904 [8:34:58<16:20:50, 1.31s/it] 27%|██▋ | 16811/61904 [8:35:00<17:17:10, 1.38s/it] 27%|██▋ | 16812/61904 [8:35:01<17:19:03, 1.38s/it] 27%|██▋ | 16813/61904 [8:35:02<17:06:36, 1.37s/it] 27%|██▋ | 16814/61904 [8:35:04<16:47:27, 1.34s/it] 27%|██▋ | 16815/61904 [8:35:05<16:42:49, 1.33s/it] 27%|██▋ | 16816/61904 [8:35:06<16:38:34, 1.33s/it] 27%|██▋ | 16817/61904 [8:35:08<17:00:27, 1.36s/it] 27%|██▋ | 16818/61904 [8:35:09<16:44:23, 1.34s/it] 27%|██▋ | 16819/61904 [8:35:11<16:58:28, 1.36s/it] 27%|██▋ | 16820/61904 [8:35:12<16:46:59, 1.34s/it] {'loss': 2.7199, 'learning_rate': 1.7306495527032283e-07, 'epoch': 4.35} + 27%|██▋ | 16820/61904 [8:35:12<16:46:59, 1.34s/it] 27%|██▋ | 16821/61904 [8:35:13<16:50:14, 1.34s/it] 27%|██▋ | 16822/61904 [8:35:15<16:48:08, 1.34s/it] 27%|██▋ | 16823/61904 [8:35:16<17:32:28, 1.40s/it] 27%|██▋ | 16824/61904 [8:35:17<17:22:50, 1.39s/it] 27%|██▋ | 16825/61904 [8:35:19<17:14:12, 1.38s/it] 27%|██▋ | 16826/61904 [8:35:20<17:09:30, 1.37s/it] 27%|██▋ | 16827/61904 [8:35:21<16:59:12, 1.36s/it] 27%|██▋ | 16828/61904 [8:35:23<16:33:04, 1.32s/it] 27%|██▋ | 16829/61904 [8:35:24<16:55:55, 1.35s/it] 27%|██▋ | 16830/61904 [8:35:26<17:28:48, 1.40s/it] 27%|██▋ | 16831/61904 [8:35:27<17:48:00, 1.42s/it] 27%|██▋ | 16832/61904 [8:35:29<17:49:54, 1.42s/it] 27%|██▋ | 16833/61904 [8:35:30<17:53:18, 1.43s/it] 27%|██▋ | 16834/61904 [8:35:31<17:46:21, 1.42s/it] 27%|██▋ | 16835/61904 [8:35:33<17:09:21, 1.37s/it] 27%|██▋ | 16836/61904 [8:35:34<17:04:57, 1.36s/it] 27%|██▋ | 16837/61904 [8:35:36<18:12:12, 1.45s/it] 27%|██▋ | 16838/61904 [8:35:37<17:29:23, 1.40s/it] 27%|██▋ | 16839/61904 [8:35:38<17:11:04, 1.37s/it] 27%|██▋ | 16840/61904 [8:35:40<17:24:23, 1.39s/it] {'loss': 2.7422, 'learning_rate': 1.730325424607805e-07, 'epoch': 4.35} + 27%|██▋ | 16840/61904 [8:35:40<17:24:23, 1.39s/it] 27%|██▋ | 16841/61904 [8:35:41<17:40:26, 1.41s/it] 27%|██▋ | 16842/61904 [8:35:42<17:14:55, 1.38s/it] 27%|██▋ | 16843/61904 [8:35:44<17:10:04, 1.37s/it] 27%|██▋ | 16844/61904 [8:35:45<16:59:55, 1.36s/it] 27%|██▋ | 16845/61904 [8:35:47<17:30:40, 1.40s/it] 27%|██▋ | 16846/61904 [8:35:48<17:33:25, 1.40s/it] 27%|██▋ | 16847/61904 [8:35:49<16:40:13, 1.33s/it] 27%|██▋ | 16848/61904 [8:35:50<16:25:59, 1.31s/it] 27%|██▋ | 16849/61904 [8:35:52<16:38:58, 1.33s/it] 27%|██▋ | 16850/61904 [8:35:53<16:59:03, 1.36s/it] 27%|██▋ | 16851/61904 [8:35:55<16:51:01, 1.35s/it] 27%|██▋ | 16852/61904 [8:35:56<17:09:29, 1.37s/it] 27%|██▋ | 16853/61904 [8:35:57<16:54:56, 1.35s/it] 27%|██▋ | 16854/61904 [8:35:59<16:57:51, 1.36s/it] 27%|██▋ | 16855/61904 [8:36:00<16:57:47, 1.36s/it] 27%|██▋ | 16856/61904 [8:36:01<16:45:04, 1.34s/it] 27%|██▋ | 16857/61904 [8:36:03<16:51:48, 1.35s/it] 27%|██▋ | 16858/61904 [8:36:04<16:47:45, 1.34s/it] 27%|██▋ | 16859/61904 [8:36:05<16:34:19, 1.32s/it] 27%|██▋ | 16860/61904 [8:36:07<17:03:58, 1.36s/it] {'loss': 2.7423, 'learning_rate': 1.7300012965123818e-07, 'epoch': 4.36} + 27%|██▋ | 16860/61904 [8:36:07<17:03:58, 1.36s/it] 27%|██▋ | 16861/61904 [8:36:08<17:06:52, 1.37s/it] 27%|██▋ | 16862/61904 [8:36:10<17:36:28, 1.41s/it] 27%|██▋ | 16863/61904 [8:36:11<17:28:11, 1.40s/it] 27%|██▋ | 16864/61904 [8:36:12<17:18:55, 1.38s/it] 27%|██▋ | 16865/61904 [8:36:14<16:35:39, 1.33s/it] 27%|██▋ | 16866/61904 [8:36:15<16:18:38, 1.30s/it] 27%|██▋ | 16867/61904 [8:36:16<16:46:30, 1.34s/it] 27%|██▋ | 16868/61904 [8:36:18<16:51:07, 1.35s/it] 27%|██▋ | 16869/61904 [8:36:19<16:55:37, 1.35s/it] 27%|██▋ | 16870/61904 [8:36:20<16:30:29, 1.32s/it] 27%|██▋ | 16871/61904 [8:36:22<16:42:48, 1.34s/it] 27%|██▋ | 16872/61904 [8:36:23<16:42:08, 1.34s/it] 27%|██▋ | 16873/61904 [8:36:24<16:59:04, 1.36s/it] 27%|██▋ | 16874/61904 [8:36:26<17:05:45, 1.37s/it] 27%|██▋ | 16875/61904 [8:36:27<17:25:27, 1.39s/it] 27%|██▋ | 16876/61904 [8:36:28<17:09:02, 1.37s/it] 27%|██▋ | 16877/61904 [8:36:30<17:11:16, 1.37s/it] 27%|██▋ | 16878/61904 [8:36:31<17:15:50, 1.38s/it] 27%|██▋ | 16879/61904 [8:36:33<16:59:15, 1.36s/it] 27%|██▋ | 16880/61904 [8:36:34<16:52:38, 1.35s/it] {'loss': 2.6748, 'learning_rate': 1.7296771684169584e-07, 'epoch': 4.36} + 27%|██▋ | 16880/61904 [8:36:34<16:52:38, 1.35s/it] 27%|██▋ | 16881/61904 [8:36:35<16:54:46, 1.35s/it] 27%|██▋ | 16882/61904 [8:36:37<17:10:35, 1.37s/it] 27%|██▋ | 16883/61904 [8:36:38<17:06:43, 1.37s/it] 27%|██▋ | 16884/61904 [8:36:39<17:00:29, 1.36s/it] 27%|██▋ | 16885/61904 [8:36:41<17:26:53, 1.40s/it] 27%|██▋ | 16886/61904 [8:36:42<16:59:44, 1.36s/it] 27%|██▋ | 16887/61904 [8:36:44<17:27:33, 1.40s/it] 27%|██▋ | 16888/61904 [8:36:45<17:22:25, 1.39s/it] 27%|██▋ | 16889/61904 [8:36:46<17:11:06, 1.37s/it] 27%|██▋ | 16890/61904 [8:36:48<16:34:41, 1.33s/it] 27%|██▋ | 16891/61904 [8:36:49<16:32:21, 1.32s/it] 27%|██▋ | 16892/61904 [8:36:50<16:17:14, 1.30s/it] 27%|██▋ | 16893/61904 [8:36:51<16:06:23, 1.29s/it] 27%|██▋ | 16894/61904 [8:36:53<16:37:56, 1.33s/it] 27%|██▋ | 16895/61904 [8:36:54<16:53:33, 1.35s/it] 27%|██▋ | 16896/61904 [8:36:56<17:02:35, 1.36s/it] 27%|██▋ | 16897/61904 [8:36:57<16:58:41, 1.36s/it] 27%|██▋ | 16898/61904 [8:36:59<18:37:04, 1.49s/it] 27%|██▋ | 16899/61904 [8:37:00<19:04:47, 1.53s/it] 27%|██▋ | 16900/61904 [8:37:02<18:50:37, 1.51s/it] {'loss': 2.6263, 'learning_rate': 1.729353040321535e-07, 'epoch': 4.37} + 27%|██▋ | 16900/61904 [8:37:02<18:50:37, 1.51s/it] 27%|██▋ | 16901/61904 [8:37:03<18:12:36, 1.46s/it] 27%|██▋ | 16902/61904 [8:37:04<17:51:54, 1.43s/it] 27%|██▋ | 16903/61904 [8:37:06<17:40:01, 1.41s/it] 27%|██▋ | 16904/61904 [8:37:07<17:05:33, 1.37s/it] 27%|██▋ | 16905/61904 [8:37:08<16:29:42, 1.32s/it] 27%|██▋ | 16906/61904 [8:37:10<16:46:59, 1.34s/it] 27%|██▋ | 16907/61904 [8:37:11<16:51:14, 1.35s/it] 27%|██▋ | 16908/61904 [8:37:12<16:19:25, 1.31s/it] 27%|██▋ | 16909/61904 [8:37:14<16:28:35, 1.32s/it] 27%|██▋ | 16910/61904 [8:37:15<16:23:32, 1.31s/it] 27%|██▋ | 16911/61904 [8:37:16<16:39:31, 1.33s/it] 27%|██▋ | 16912/61904 [8:37:18<16:49:40, 1.35s/it] 27%|██▋ | 16913/61904 [8:37:19<16:43:15, 1.34s/it] 27%|██▋ | 16914/61904 [8:37:20<16:51:40, 1.35s/it] 27%|██▋ | 16915/61904 [8:37:22<17:25:10, 1.39s/it] 27%|██▋ | 16916/61904 [8:37:23<17:06:48, 1.37s/it] 27%|██▋ | 16917/61904 [8:37:25<17:08:33, 1.37s/it] 27%|██▋ | 16918/61904 [8:37:26<17:17:12, 1.38s/it] 27%|██▋ | 16919/61904 [8:37:27<17:10:26, 1.37s/it] 27%|██▋ | 16920/61904 [8:37:29<16:45:42, 1.34s/it] {'loss': 2.75, 'learning_rate': 1.729028912226112e-07, 'epoch': 4.37} + 27%|██▋ | 16920/61904 [8:37:29<16:45:42, 1.34s/it] 27%|██▋ | 16921/61904 [8:37:30<16:48:39, 1.35s/it] 27%|██▋ | 16922/61904 [8:37:31<17:00:35, 1.36s/it] 27%|██▋ | 16923/61904 [8:37:33<16:26:22, 1.32s/it] 27%|██▋ | 16924/61904 [8:37:34<16:29:32, 1.32s/it] 27%|██▋ | 16925/61904 [8:37:35<16:22:25, 1.31s/it] 27%|██▋ | 16926/61904 [8:37:36<16:24:34, 1.31s/it] 27%|██▋ | 16927/61904 [8:37:38<16:24:04, 1.31s/it] 27%|██▋ | 16928/61904 [8:37:39<16:10:30, 1.29s/it] 27%|██▋ | 16929/61904 [8:37:40<16:07:13, 1.29s/it] 27%|██▋ | 16930/61904 [8:37:42<16:08:24, 1.29s/it] 27%|██▋ | 16931/61904 [8:37:43<15:38:04, 1.25s/it] 27%|██▋ | 16932/61904 [8:37:44<16:05:58, 1.29s/it] 27%|██▋ | 16933/61904 [8:37:46<16:21:27, 1.31s/it] 27%|██▋ | 16934/61904 [8:37:47<16:16:10, 1.30s/it] 27%|██▋ | 16935/61904 [8:37:48<16:21:50, 1.31s/it] 27%|██▋ | 16936/61904 [8:37:50<16:46:22, 1.34s/it] 27%|██▋ | 16937/61904 [8:37:51<17:15:04, 1.38s/it] 27%|██▋ | 16938/61904 [8:37:52<17:07:53, 1.37s/it] 27%|██▋ | 16939/61904 [8:37:54<16:44:26, 1.34s/it] 27%|██▋ | 16940/61904 [8:37:55<16:43:02, 1.34s/it] {'loss': 2.7347, 'learning_rate': 1.7287047841306882e-07, 'epoch': 4.38} + 27%|██▋ | 16940/61904 [8:37:55<16:43:02, 1.34s/it] 27%|██▋ | 16941/61904 [8:37:56<16:40:44, 1.34s/it] 27%|██▋ | 16942/61904 [8:37:58<17:02:44, 1.36s/it] 27%|██▋ | 16943/61904 [8:37:59<17:17:49, 1.38s/it] 27%|██▋ | 16944/61904 [8:38:01<17:12:01, 1.38s/it] 27%|██▋ | 16945/61904 [8:38:02<16:37:40, 1.33s/it] 27%|██▋ | 16946/61904 [8:38:03<16:36:44, 1.33s/it] 27%|██▋ | 16947/61904 [8:38:04<16:47:19, 1.34s/it] 27%|██▋ | 16948/61904 [8:38:06<17:23:46, 1.39s/it] 27%|██▋ | 16949/61904 [8:38:07<17:45:16, 1.42s/it] 27%|██▋ | 16950/61904 [8:38:09<17:33:06, 1.41s/it] 27%|██▋ | 16951/61904 [8:38:10<17:44:15, 1.42s/it] 27%|██▋ | 16952/61904 [8:38:12<17:27:37, 1.40s/it] 27%|██▋ | 16953/61904 [8:38:13<17:56:28, 1.44s/it] 27%|██▋ | 16954/61904 [8:38:15<17:54:27, 1.43s/it] 27%|██▋ | 16955/61904 [8:38:16<17:27:50, 1.40s/it] 27%|██▋ | 16956/61904 [8:38:17<17:30:02, 1.40s/it] 27%|██▋ | 16957/61904 [8:38:19<17:25:08, 1.40s/it] 27%|██▋ | 16958/61904 [8:38:20<17:47:05, 1.42s/it] 27%|██▋ | 16959/61904 [8:38:22<17:47:29, 1.43s/it] 27%|██▋ | 16960/61904 [8:38:23<17:20:34, 1.39s/it] {'loss': 2.7242, 'learning_rate': 1.728380656035265e-07, 'epoch': 4.38} + 27%|██▋ | 16960/61904 [8:38:23<17:20:34, 1.39s/it] 27%|██▋ | 16961/61904 [8:38:24<17:25:09, 1.40s/it] 27%|██▋ | 16962/61904 [8:38:26<16:57:32, 1.36s/it] 27%|██▋ | 16963/61904 [8:38:27<16:41:45, 1.34s/it] 27%|██▋ | 16964/61904 [8:38:28<17:13:17, 1.38s/it] 27%|██▋ | 16965/61904 [8:38:30<17:59:28, 1.44s/it] 27%|██▋ | 16966/61904 [8:38:31<17:47:55, 1.43s/it] 27%|██▋ | 16967/61904 [8:38:33<17:19:12, 1.39s/it] 27%|██▋ | 16968/61904 [8:38:34<16:57:03, 1.36s/it] 27%|██▋ | 16969/61904 [8:38:35<16:27:22, 1.32s/it] 27%|██▋ | 16970/61904 [8:38:36<16:16:45, 1.30s/it] 27%|██▋ | 16971/61904 [8:38:38<16:39:29, 1.33s/it] 27%|██▋ | 16972/61904 [8:38:39<16:50:52, 1.35s/it] 27%|██▋ | 16973/61904 [8:38:41<17:04:04, 1.37s/it] 27%|██▋ | 16974/61904 [8:38:42<16:55:58, 1.36s/it] 27%|██▋ | 16975/61904 [8:38:43<17:10:08, 1.38s/it] 27%|██▋ | 16976/61904 [8:38:45<17:28:08, 1.40s/it] 27%|██▋ | 16977/61904 [8:38:46<17:30:53, 1.40s/it] 27%|██▋ | 16978/61904 [8:38:48<17:22:50, 1.39s/it] 27%|██▋ | 16979/61904 [8:38:49<18:22:50, 1.47s/it] 27%|██▋ | 16980/61904 [8:38:51<17:57:56, 1.44s/it] {'loss': 2.7301, 'learning_rate': 1.7280565279398417e-07, 'epoch': 4.39} + 27%|██▋ | 16980/61904 [8:38:51<17:57:56, 1.44s/it] 27%|██▋ | 16981/61904 [8:38:52<18:02:38, 1.45s/it] 27%|██▋ | 16982/61904 [8:38:54<18:53:44, 1.51s/it] 27%|██▋ | 16983/61904 [8:38:55<18:13:08, 1.46s/it] 27%|██▋ | 16984/61904 [8:38:57<18:08:14, 1.45s/it] 27%|██▋ | 16985/61904 [8:38:58<17:56:02, 1.44s/it] 27%|██▋ | 16986/61904 [8:38:59<17:25:49, 1.40s/it] 27%|██▋ | 16987/61904 [8:39:01<17:31:52, 1.41s/it] 27%|██▋ | 16988/61904 [8:39:02<17:30:32, 1.40s/it] 27%|██▋ | 16989/61904 [8:39:03<17:15:26, 1.38s/it] 27%|██▋ | 16990/61904 [8:39:05<17:24:44, 1.40s/it] 27%|██▋ | 16991/61904 [8:39:06<17:45:26, 1.42s/it] 27%|██▋ | 16992/61904 [8:39:08<17:33:20, 1.41s/it] 27%|██▋ | 16993/61904 [8:39:09<17:26:44, 1.40s/it] 27%|██▋ | 16994/61904 [8:39:10<17:12:04, 1.38s/it] 27%|██▋ | 16995/61904 [8:39:12<17:03:29, 1.37s/it] 27%|██▋ | 16996/61904 [8:39:13<17:07:28, 1.37s/it] 27%|██▋ | 16997/61904 [8:39:14<17:00:05, 1.36s/it] 27%|██▋ | 16998/61904 [8:39:16<17:02:47, 1.37s/it] 27%|██▋ | 16999/61904 [8:39:17<17:24:02, 1.40s/it] 27%|██▋ | 17000/61904 [8:39:19<17:27:55, 1.40s/it] {'loss': 2.6954, 'learning_rate': 1.7277323998444184e-07, 'epoch': 4.39} + 27%|██▋ | 17000/61904 [8:39:19<17:27:55, 1.40s/it] 27%|██▋ | 17001/61904 [8:39:20<16:52:28, 1.35s/it] 27%|██▋ | 17002/61904 [8:39:21<16:50:47, 1.35s/it] 27%|██▋ | 17003/61904 [8:39:23<16:47:31, 1.35s/it] 27%|██▋ | 17004/61904 [8:39:24<16:52:52, 1.35s/it] 27%|██▋ | 17005/61904 [8:39:25<16:29:19, 1.32s/it] 27%|██▋ | 17006/61904 [8:39:27<16:38:47, 1.33s/it] 27%|██▋ | 17007/61904 [8:39:28<16:24:58, 1.32s/it] 27%|██▋ | 17008/61904 [8:39:29<16:12:50, 1.30s/it] 27%|██▋ | 17009/61904 [8:39:31<16:38:06, 1.33s/it] 27%|██▋ | 17010/61904 [8:39:32<17:08:48, 1.37s/it] 27%|██▋ | 17011/61904 [8:39:34<17:36:09, 1.41s/it] 27%|██▋ | 17012/61904 [8:39:35<17:13:37, 1.38s/it] 27%|██▋ | 17013/61904 [8:39:36<16:58:05, 1.36s/it] 27%|██▋ | 17014/61904 [8:39:38<17:11:03, 1.38s/it] 27%|██▋ | 17015/61904 [8:39:39<17:06:20, 1.37s/it] 27%|██▋ | 17016/61904 [8:39:40<17:00:06, 1.36s/it] 27%|██▋ | 17017/61904 [8:39:42<16:54:12, 1.36s/it] 27%|██▋ | 17018/61904 [8:39:43<16:50:19, 1.35s/it] 27%|██▋ | 17019/61904 [8:39:44<16:36:51, 1.33s/it] 27%|██▋ | 17020/61904 [8:39:46<16:45:16, 1.34s/it] {'loss': 2.6983, 'learning_rate': 1.7274082717489952e-07, 'epoch': 4.4} + 27%|██▋ | 17020/61904 [8:39:46<16:45:16, 1.34s/it] 27%|██▋ | 17021/61904 [8:39:47<16:31:04, 1.32s/it] 27%|██▋ | 17022/61904 [8:39:48<17:24:44, 1.40s/it] 27%|██▋ | 17023/61904 [8:39:50<17:03:26, 1.37s/it] 28%|██▊ | 17024/61904 [8:39:51<17:04:20, 1.37s/it] 28%|██▊ | 17025/61904 [8:39:52<16:51:40, 1.35s/it] 28%|██▊ | 17026/61904 [8:39:54<17:39:50, 1.42s/it] 28%|██▊ | 17027/61904 [8:39:55<17:34:24, 1.41s/it] 28%|██▊ | 17028/61904 [8:39:57<17:10:40, 1.38s/it] 28%|██▊ | 17029/61904 [8:39:58<17:18:55, 1.39s/it] 28%|██▊ | 17030/61904 [8:40:00<17:24:44, 1.40s/it] 28%|██▊ | 17031/61904 [8:40:01<16:59:06, 1.36s/it] 28%|██▊ | 17032/61904 [8:40:02<16:47:44, 1.35s/it] 28%|██▊ | 17033/61904 [8:40:04<17:01:09, 1.37s/it] 28%|██▊ | 17034/61904 [8:40:05<17:03:05, 1.37s/it] 28%|██▊ | 17035/61904 [8:40:06<16:57:45, 1.36s/it] 28%|██▊ | 17036/61904 [8:40:08<16:52:42, 1.35s/it] 28%|██▊ | 17037/61904 [8:40:09<17:37:48, 1.41s/it] 28%|██▊ | 17038/61904 [8:40:11<17:59:01, 1.44s/it] 28%|██▊ | 17039/61904 [8:40:12<17:22:33, 1.39s/it] 28%|██▊ | 17040/61904 [8:40:13<17:42:21, 1.42s/it] {'loss': 2.7694, 'learning_rate': 1.7270841436535719e-07, 'epoch': 4.4} + 28%|██▊ | 17040/61904 [8:40:13<17:42:21, 1.42s/it] 28%|██▊ | 17041/61904 [8:40:15<17:39:20, 1.42s/it] 28%|██▊ | 17042/61904 [8:40:16<17:38:46, 1.42s/it] 28%|██▊ | 17043/61904 [8:40:18<17:21:52, 1.39s/it] 28%|██▊ | 17044/61904 [8:40:19<17:27:30, 1.40s/it] 28%|██▊ | 17045/61904 [8:40:20<17:02:40, 1.37s/it] 28%|██▊ | 17046/61904 [8:40:22<17:23:20, 1.40s/it] 28%|██▊ | 17047/61904 [8:40:23<17:43:43, 1.42s/it] 28%|██▊ | 17048/61904 [8:40:25<17:14:11, 1.38s/it] 28%|██▊ | 17049/61904 [8:40:26<17:24:14, 1.40s/it] 28%|██▊ | 17050/61904 [8:40:27<17:03:15, 1.37s/it] 28%|██▊ | 17051/61904 [8:40:29<16:57:38, 1.36s/it] 28%|██▊ | 17052/61904 [8:40:30<17:22:11, 1.39s/it] 28%|██▊ | 17053/61904 [8:40:31<16:38:46, 1.34s/it] 28%|██▊ | 17054/61904 [8:40:33<16:41:57, 1.34s/it] 28%|██▊ | 17055/61904 [8:40:34<16:48:30, 1.35s/it] 28%|██▊ | 17056/61904 [8:40:35<16:40:55, 1.34s/it] 28%|██▊ | 17057/61904 [8:40:37<16:50:15, 1.35s/it] 28%|██▊ | 17058/61904 [8:40:38<17:02:34, 1.37s/it] 28%|██▊ | 17059/61904 [8:40:39<16:43:29, 1.34s/it] 28%|██▊ | 17060/61904 [8:40:41<16:15:46, 1.31s/it] {'loss': 2.6728, 'learning_rate': 1.7267600155581485e-07, 'epoch': 4.41} + 28%|██▊ | 17060/61904 [8:40:41<16:15:46, 1.31s/it] 28%|██▊ | 17061/61904 [8:40:42<16:33:06, 1.33s/it] 28%|██▊ | 17062/61904 [8:40:43<17:01:21, 1.37s/it] 28%|██▊ | 17063/61904 [8:40:45<17:07:36, 1.38s/it] 28%|██▊ | 17064/61904 [8:40:46<16:50:21, 1.35s/it] 28%|██▊ | 17065/61904 [8:40:48<17:04:40, 1.37s/it] 28%|██▊ | 17066/61904 [8:40:49<17:34:24, 1.41s/it] 28%|██▊ | 17067/61904 [8:40:50<17:29:21, 1.40s/it] 28%|██▊ | 17068/61904 [8:40:52<16:53:51, 1.36s/it] 28%|██▊ | 17069/61904 [8:40:53<16:49:05, 1.35s/it] 28%|██▊ | 17070/61904 [8:40:54<16:44:13, 1.34s/it] 28%|██▊ | 17071/61904 [8:40:56<16:38:44, 1.34s/it] 28%|██▊ | 17072/61904 [8:40:57<16:35:19, 1.33s/it] 28%|██▊ | 17073/61904 [8:40:58<16:52:44, 1.36s/it] 28%|██▊ | 17074/61904 [8:41:00<16:36:30, 1.33s/it] 28%|██▊ | 17075/61904 [8:41:01<17:11:59, 1.38s/it] 28%|██▊ | 17076/61904 [8:41:02<16:46:49, 1.35s/it] 28%|██▊ | 17077/61904 [8:41:04<17:06:48, 1.37s/it] 28%|██▊ | 17078/61904 [8:41:05<17:03:49, 1.37s/it] 28%|██▊ | 17079/61904 [8:41:07<16:55:20, 1.36s/it] 28%|██▊ | 17080/61904 [8:41:08<17:11:08, 1.38s/it] {'loss': 2.7151, 'learning_rate': 1.7264358874627254e-07, 'epoch': 4.41} + 28%|██▊ | 17080/61904 [8:41:08<17:11:08, 1.38s/it] 28%|██▊ | 17081/61904 [8:41:09<17:15:38, 1.39s/it] 28%|██▊ | 17082/61904 [8:41:11<17:25:52, 1.40s/it] 28%|██▊ | 17083/61904 [8:41:12<17:07:07, 1.37s/it] 28%|██▊ | 17084/61904 [8:41:14<17:28:38, 1.40s/it] 28%|██▊ | 17085/61904 [8:41:15<18:09:38, 1.46s/it] 28%|██▊ | 17086/61904 [8:41:17<17:52:51, 1.44s/it] 28%|██▊ | 17087/61904 [8:41:18<17:19:19, 1.39s/it] 28%|██▊ | 17088/61904 [8:41:19<17:33:46, 1.41s/it] 28%|██▊ | 17089/61904 [8:41:21<17:27:34, 1.40s/it] 28%|██▊ | 17090/61904 [8:41:22<17:19:03, 1.39s/it] 28%|██▊ | 17091/61904 [8:41:24<17:32:42, 1.41s/it] 28%|██▊ | 17092/61904 [8:41:25<17:18:46, 1.39s/it] 28%|██▊ | 17093/61904 [8:41:26<17:25:57, 1.40s/it] 28%|██▊ | 17094/61904 [8:41:28<17:40:02, 1.42s/it] 28%|██▊ | 17095/61904 [8:41:29<17:54:42, 1.44s/it] 28%|██▊ | 17096/61904 [8:41:31<18:00:37, 1.45s/it] 28%|██▊ | 17097/61904 [8:41:32<17:37:49, 1.42s/it] 28%|██▊ | 17098/61904 [8:41:34<17:52:07, 1.44s/it] 28%|██▊ | 17099/61904 [8:41:35<17:23:16, 1.40s/it] 28%|██▊ | 17100/61904 [8:41:36<18:02:50, 1.45s/it] {'loss': 2.6792, 'learning_rate': 1.7261117593673017e-07, 'epoch': 4.42} + 28%|██▊ | 17100/61904 [8:41:36<18:02:50, 1.45s/it] 28%|██▊ | 17101/61904 [8:41:38<17:46:36, 1.43s/it] 28%|██▊ | 17102/61904 [8:41:39<17:47:55, 1.43s/it] 28%|██▊ | 17103/61904 [8:41:41<18:06:26, 1.46s/it] 28%|██▊ | 17104/61904 [8:41:42<17:58:37, 1.44s/it] 28%|██▊ | 17105/61904 [8:41:44<17:37:54, 1.42s/it] 28%|██▊ | 17106/61904 [8:41:45<16:59:29, 1.37s/it] 28%|██▊ | 17107/61904 [8:41:46<17:09:01, 1.38s/it] 28%|██▊ | 17108/61904 [8:41:47<16:41:14, 1.34s/it] 28%|██▊ | 17109/61904 [8:41:49<16:21:05, 1.31s/it] 28%|██▊ | 17110/61904 [8:41:50<16:39:56, 1.34s/it] 28%|██▊ | 17111/61904 [8:41:51<16:55:16, 1.36s/it] 28%|██▊ | 17112/61904 [8:41:53<16:34:04, 1.33s/it] 28%|██▊ | 17113/61904 [8:41:54<17:23:10, 1.40s/it] 28%|██▊ | 17114/61904 [8:41:56<17:06:31, 1.38s/it] 28%|██▊ | 17115/61904 [8:41:57<16:52:22, 1.36s/it] 28%|██▊ | 17116/61904 [8:41:58<16:57:52, 1.36s/it] 28%|██▊ | 17117/61904 [8:42:00<16:51:42, 1.36s/it] 28%|██▊ | 17118/61904 [8:42:01<17:24:07, 1.40s/it] 28%|██▊ | 17119/61904 [8:42:02<16:56:15, 1.36s/it] 28%|██▊ | 17120/61904 [8:42:04<16:48:59, 1.35s/it] {'loss': 2.7092, 'learning_rate': 1.7257876312718786e-07, 'epoch': 4.42} + 28%|██▊ | 17120/61904 [8:42:04<16:48:59, 1.35s/it] 28%|██▊ | 17121/61904 [8:42:05<17:08:23, 1.38s/it] 28%|██▊ | 17122/61904 [8:42:07<16:56:03, 1.36s/it] 28%|██▊ | 17123/61904 [8:42:08<16:45:04, 1.35s/it] 28%|██▊ | 17124/61904 [8:42:09<17:08:48, 1.38s/it] 28%|██▊ | 17125/61904 [8:42:11<16:54:54, 1.36s/it] 28%|██▊ | 17126/61904 [8:42:12<16:24:41, 1.32s/it] 28%|██▊ | 17127/61904 [8:42:13<16:14:40, 1.31s/it] 28%|██▊ | 17128/61904 [8:42:14<16:16:15, 1.31s/it] 28%|██▊ | 17129/61904 [8:42:16<16:16:38, 1.31s/it] 28%|██▊ | 17130/61904 [8:42:17<16:53:59, 1.36s/it] 28%|██▊ | 17131/61904 [8:42:19<16:49:44, 1.35s/it] 28%|██▊ | 17132/61904 [8:42:20<17:13:22, 1.38s/it] 28%|██▊ | 17133/61904 [8:42:21<16:58:22, 1.36s/it] 28%|██▊ | 17134/61904 [8:42:23<16:34:07, 1.33s/it] 28%|██▊ | 17135/61904 [8:42:24<16:32:37, 1.33s/it] 28%|██▊ | 17136/61904 [8:42:25<16:46:33, 1.35s/it] 28%|██▊ | 17137/61904 [8:42:27<16:45:23, 1.35s/it] 28%|██▊ | 17138/61904 [8:42:28<16:15:25, 1.31s/it] 28%|██▊ | 17139/61904 [8:42:29<16:12:33, 1.30s/it] 28%|██▊ | 17140/61904 [8:42:31<16:46:03, 1.35s/it] {'loss': 2.7595, 'learning_rate': 1.7254635031764552e-07, 'epoch': 4.43} + 28%|██▊ | 17140/61904 [8:42:31<16:46:03, 1.35s/it] 28%|██▊ | 17141/61904 [8:42:32<16:41:52, 1.34s/it] 28%|██▊ | 17142/61904 [8:42:33<16:41:23, 1.34s/it] 28%|██▊ | 17143/61904 [8:42:35<17:01:10, 1.37s/it] 28%|██▊ | 17144/61904 [8:42:36<17:03:26, 1.37s/it] 28%|██▊ | 17145/61904 [8:42:38<17:34:56, 1.41s/it] 28%|██▊ | 17146/61904 [8:42:39<16:59:11, 1.37s/it] 28%|██▊ | 17147/61904 [8:42:40<17:02:09, 1.37s/it] 28%|██▊ | 17148/61904 [8:42:42<17:14:56, 1.39s/it] 28%|██▊ | 17149/61904 [8:42:43<16:50:09, 1.35s/it] 28%|██▊ | 17150/61904 [8:42:44<17:07:34, 1.38s/it] 28%|██▊ | 17151/61904 [8:42:46<17:18:27, 1.39s/it] 28%|██▊ | 17152/61904 [8:42:47<17:05:34, 1.38s/it] 28%|██▊ | 17153/61904 [8:42:48<16:58:28, 1.37s/it] 28%|██▊ | 17154/61904 [8:42:50<17:32:19, 1.41s/it] 28%|██▊ | 17155/61904 [8:42:52<17:59:45, 1.45s/it] 28%|██▊ | 17156/61904 [8:42:53<18:14:17, 1.47s/it] 28%|██▊ | 17157/61904 [8:42:55<18:20:57, 1.48s/it] 28%|██▊ | 17158/61904 [8:42:56<18:05:35, 1.46s/it] 28%|██▊ | 17159/61904 [8:42:57<17:31:45, 1.41s/it] 28%|██▊ | 17160/61904 [8:42:59<17:20:45, 1.40s/it] {'loss': 2.7303, 'learning_rate': 1.7251393750810318e-07, 'epoch': 4.43} + 28%|██▊ | 17160/61904 [8:42:59<17:20:45, 1.40s/it] 28%|██▊ | 17161/61904 [8:43:00<17:28:30, 1.41s/it] 28%|██▊ | 17162/61904 [8:43:01<17:06:53, 1.38s/it] 28%|██▊ | 17163/61904 [8:43:03<16:50:53, 1.36s/it] 28%|██▊ | 17164/61904 [8:43:04<17:08:24, 1.38s/it] 28%|██▊ | 17165/61904 [8:43:05<16:55:13, 1.36s/it] 28%|██▊ | 17166/61904 [8:43:07<17:11:35, 1.38s/it] 28%|██▊ | 17167/61904 [8:43:08<16:58:34, 1.37s/it] 28%|██▊ | 17168/61904 [8:43:10<17:06:37, 1.38s/it] 28%|██▊ | 17169/61904 [8:43:11<16:41:01, 1.34s/it] 28%|██▊ | 17170/61904 [8:43:12<16:04:41, 1.29s/it] 28%|██▊ | 17171/61904 [8:43:13<16:44:05, 1.35s/it] 28%|██▊ | 17172/61904 [8:43:15<17:00:51, 1.37s/it] 28%|██▊ | 17173/61904 [8:43:16<16:46:35, 1.35s/it] 28%|██▊ | 17174/61904 [8:43:17<16:20:10, 1.31s/it] 28%|██▊ | 17175/61904 [8:43:19<16:55:40, 1.36s/it] 28%|██▊ | 17176/61904 [8:43:20<17:10:36, 1.38s/it] 28%|██▊ | 17177/61904 [8:43:21<16:17:19, 1.31s/it] 28%|██▊ | 17178/61904 [8:43:23<16:25:49, 1.32s/it] 28%|██▊ | 17179/61904 [8:43:24<16:13:54, 1.31s/it] 28%|██▊ | 17180/61904 [8:43:26<16:38:04, 1.34s/it] {'loss': 2.6834, 'learning_rate': 1.7248152469856087e-07, 'epoch': 4.44} + 28%|██▊ | 17180/61904 [8:43:26<16:38:04, 1.34s/it] 28%|██▊ | 17181/61904 [8:43:27<16:21:20, 1.32s/it] 28%|██▊ | 17182/61904 [8:43:28<16:00:46, 1.29s/it] 28%|██▊ | 17183/61904 [8:43:29<16:04:49, 1.29s/it] 28%|██▊ | 17184/61904 [8:43:31<16:06:37, 1.30s/it] 28%|██▊ | 17185/61904 [8:43:32<16:25:55, 1.32s/it] 28%|██▊ | 17186/61904 [8:43:33<16:42:18, 1.34s/it] 28%|██▊ | 17187/61904 [8:43:35<17:15:17, 1.39s/it] 28%|██▊ | 17188/61904 [8:43:36<17:01:00, 1.37s/it] 28%|██▊ | 17189/61904 [8:43:38<17:33:34, 1.41s/it] 28%|██▊ | 17190/61904 [8:43:39<17:31:28, 1.41s/it] 28%|██▊ | 17191/61904 [8:43:40<17:10:14, 1.38s/it] 28%|██▊ | 17192/61904 [8:43:42<16:54:30, 1.36s/it] 28%|██▊ | 17193/61904 [8:43:43<16:47:23, 1.35s/it] 28%|██▊ | 17194/61904 [8:43:45<17:10:00, 1.38s/it] 28%|██▊ | 17195/61904 [8:43:46<16:51:14, 1.36s/it] 28%|██▊ | 17196/61904 [8:43:47<16:34:04, 1.33s/it] 28%|██▊ | 17197/61904 [8:43:49<16:55:10, 1.36s/it] 28%|██▊ | 17198/61904 [8:43:50<16:34:08, 1.33s/it] 28%|██▊ | 17199/61904 [8:43:51<16:44:18, 1.35s/it] 28%|██▊ | 17200/61904 [8:43:52<16:17:50, 1.31s/it] {'loss': 2.728, 'learning_rate': 1.7244911188901853e-07, 'epoch': 4.45} + 28%|██▊ | 17200/61904 [8:43:52<16:17:50, 1.31s/it] 28%|██▊ | 17201/61904 [8:43:54<16:14:07, 1.31s/it] 28%|██▊ | 17202/61904 [8:43:55<16:44:05, 1.35s/it] 28%|██▊ | 17203/61904 [8:43:57<16:45:23, 1.35s/it] 28%|██▊ | 17204/61904 [8:43:58<16:46:18, 1.35s/it] 28%|██▊ | 17205/61904 [8:43:59<16:52:17, 1.36s/it] 28%|██▊ | 17206/61904 [8:44:01<16:51:33, 1.36s/it] 28%|██▊ | 17207/61904 [8:44:02<16:58:29, 1.37s/it] 28%|██▊ | 17208/61904 [8:44:04<17:40:12, 1.42s/it] 28%|██▊ | 17209/61904 [8:44:05<17:04:23, 1.38s/it] 28%|██▊ | 17210/61904 [8:44:06<16:53:59, 1.36s/it] 28%|██▊ | 17211/61904 [8:44:07<16:14:59, 1.31s/it] 28%|██▊ | 17212/61904 [8:44:09<17:06:43, 1.38s/it] 28%|██▊ | 17213/61904 [8:44:11<18:16:49, 1.47s/it] 28%|██▊ | 17214/61904 [8:44:12<17:46:44, 1.43s/it] 28%|██▊ | 17215/61904 [8:44:13<17:28:01, 1.41s/it] 28%|██▊ | 17216/61904 [8:44:15<17:38:04, 1.42s/it] 28%|██▊ | 17217/61904 [8:44:16<17:25:31, 1.40s/it] 28%|██▊ | 17218/61904 [8:44:17<17:15:27, 1.39s/it] 28%|██▊ | 17219/61904 [8:44:19<16:35:45, 1.34s/it] 28%|██▊ | 17220/61904 [8:44:20<16:50:33, 1.36s/it] {'loss': 2.7141, 'learning_rate': 1.724166990794762e-07, 'epoch': 4.45} + 28%|██▊ | 17220/61904 [8:44:20<16:50:33, 1.36s/it] 28%|██▊ | 17221/61904 [8:44:21<16:39:25, 1.34s/it] 28%|██▊ | 17222/61904 [8:44:23<16:45:00, 1.35s/it] 28%|██▊ | 17223/61904 [8:44:24<16:32:40, 1.33s/it] 28%|██▊ | 17224/61904 [8:44:25<16:16:18, 1.31s/it] 28%|██▊ | 17225/61904 [8:44:27<17:15:07, 1.39s/it] 28%|██▊ | 17226/61904 [8:44:28<17:10:22, 1.38s/it] 28%|██▊ | 17227/61904 [8:44:30<17:33:03, 1.41s/it] 28%|██▊ | 17228/61904 [8:44:31<16:59:51, 1.37s/it] 28%|██▊ | 17229/61904 [8:44:32<16:34:28, 1.34s/it] 28%|██▊ | 17230/61904 [8:44:34<16:28:35, 1.33s/it] 28%|██▊ | 17231/61904 [8:44:35<16:59:56, 1.37s/it] 28%|██▊ | 17232/61904 [8:44:37<17:40:32, 1.42s/it] 28%|██▊ | 17233/61904 [8:44:38<17:29:52, 1.41s/it] 28%|██▊ | 17234/61904 [8:44:39<17:09:29, 1.38s/it] 28%|██▊ | 17235/61904 [8:44:41<17:26:44, 1.41s/it] 28%|██▊ | 17236/61904 [8:44:42<17:03:54, 1.38s/it] 28%|██▊ | 17237/61904 [8:44:43<17:22:33, 1.40s/it] 28%|██▊ | 17238/61904 [8:44:45<17:23:50, 1.40s/it] 28%|██▊ | 17239/61904 [8:44:46<17:15:41, 1.39s/it] 28%|██▊ | 17240/61904 [8:44:48<17:40:02, 1.42s/it] {'loss': 2.6973, 'learning_rate': 1.7238428626993388e-07, 'epoch': 4.46} + 28%|██▊ | 17240/61904 [8:44:48<17:40:02, 1.42s/it] 28%|██▊ | 17241/61904 [8:44:49<17:39:18, 1.42s/it] 28%|██▊ | 17242/61904 [8:44:51<17:26:02, 1.41s/it] 28%|██▊ | 17243/61904 [8:44:52<17:38:13, 1.42s/it] 28%|██▊ | 17244/61904 [8:44:53<17:45:04, 1.43s/it] 28%|██▊ | 17245/61904 [8:44:55<17:33:28, 1.42s/it] 28%|██▊ | 17246/61904 [8:44:56<17:05:25, 1.38s/it] 28%|██▊ | 17247/61904 [8:44:58<17:24:50, 1.40s/it] 28%|██▊ | 17248/61904 [8:44:59<17:31:24, 1.41s/it] 28%|██▊ | 17249/61904 [8:45:00<17:06:02, 1.38s/it] 28%|██▊ | 17250/61904 [8:45:02<17:33:08, 1.42s/it] 28%|██▊ | 17251/61904 [8:45:03<17:29:10, 1.41s/it] 28%|██▊ | 17252/61904 [8:45:05<17:17:52, 1.39s/it] 28%|██▊ | 17253/61904 [8:45:06<17:27:19, 1.41s/it] 28%|██▊ | 17254/61904 [8:45:07<17:33:16, 1.42s/it] 28%|██▊ | 17255/61904 [8:45:09<16:58:14, 1.37s/it] 28%|██▊ | 17256/61904 [8:45:10<17:17:53, 1.39s/it] 28%|██▊ | 17257/61904 [8:45:11<17:02:21, 1.37s/it] 28%|██▊ | 17258/61904 [8:45:13<16:58:10, 1.37s/it] 28%|██▊ | 17259/61904 [8:45:14<16:45:05, 1.35s/it] 28%|██▊ | 17260/61904 [8:45:16<17:09:14, 1.38s/it] {'loss': 2.7242, 'learning_rate': 1.7235187346039152e-07, 'epoch': 4.46} + 28%|██▊ | 17260/61904 [8:45:16<17:09:14, 1.38s/it] 28%|██▊ | 17261/61904 [8:45:17<17:22:43, 1.40s/it] 28%|██▊ | 17262/61904 [8:45:19<18:36:31, 1.50s/it] 28%|██▊ | 17263/61904 [8:45:20<18:24:31, 1.48s/it] 28%|██▊ | 17264/61904 [8:45:22<17:53:22, 1.44s/it] 28%|██▊ | 17265/61904 [8:45:23<17:15:44, 1.39s/it] 28%|██▊ | 17266/61904 [8:45:24<16:52:33, 1.36s/it] 28%|██▊ | 17267/61904 [8:45:26<17:00:31, 1.37s/it] 28%|██▊ | 17268/61904 [8:45:27<17:02:55, 1.38s/it] 28%|██▊ | 17269/61904 [8:45:28<17:19:51, 1.40s/it] 28%|██▊ | 17270/61904 [8:45:30<17:28:26, 1.41s/it] 28%|██▊ | 17271/61904 [8:45:31<17:28:20, 1.41s/it] 28%|██▊ | 17272/61904 [8:45:33<17:08:15, 1.38s/it] 28%|██▊ | 17273/61904 [8:45:34<16:58:28, 1.37s/it] 28%|██▊ | 17274/61904 [8:45:35<17:54:50, 1.45s/it] 28%|██▊ | 17275/61904 [8:45:37<17:32:26, 1.41s/it] 28%|██▊ | 17276/61904 [8:45:38<17:08:23, 1.38s/it] 28%|██▊ | 17277/61904 [8:45:39<16:49:43, 1.36s/it] 28%|██▊ | 17278/61904 [8:45:41<16:43:13, 1.35s/it] 28%|██▊ | 17279/61904 [8:45:42<16:50:55, 1.36s/it] 28%|██▊ | 17280/61904 [8:45:44<17:04:58, 1.38s/it] {'loss': 2.6033, 'learning_rate': 1.723194606508492e-07, 'epoch': 4.47} + 28%|██▊ | 17280/61904 [8:45:44<17:04:58, 1.38s/it] 28%|██▊ | 17281/61904 [8:45:45<17:04:43, 1.38s/it] 28%|██▊ | 17282/61904 [8:45:46<16:56:06, 1.37s/it] 28%|██▊ | 17283/61904 [8:45:47<16:08:40, 1.30s/it] 28%|██▊ | 17284/61904 [8:45:49<16:23:17, 1.32s/it] 28%|██▊ | 17285/61904 [8:45:50<17:07:56, 1.38s/it] 28%|██▊ | 17286/61904 [8:45:52<17:25:07, 1.41s/it] 28%|██▊ | 17287/61904 [8:45:53<17:00:24, 1.37s/it] 28%|██▊ | 17288/61904 [8:45:54<17:02:53, 1.38s/it] 28%|██▊ | 17289/61904 [8:45:56<16:31:39, 1.33s/it] 28%|██▊ | 17290/61904 [8:45:57<16:22:53, 1.32s/it] 28%|██▊ | 17291/61904 [8:45:58<16:24:12, 1.32s/it] 28%|██▊ | 17292/61904 [8:46:00<16:32:15, 1.33s/it] 28%|██▊ | 17293/61904 [8:46:01<15:58:14, 1.29s/it] 28%|██▊ | 17294/61904 [8:46:02<16:28:09, 1.33s/it] 28%|██▊ | 17295/61904 [8:46:04<16:43:54, 1.35s/it] 28%|██▊ | 17296/61904 [8:46:05<16:45:41, 1.35s/it] 28%|██▊ | 17297/61904 [8:46:06<16:07:15, 1.30s/it] 28%|██▊ | 17298/61904 [8:46:08<16:12:20, 1.31s/it] 28%|██▊ | 17299/61904 [8:46:09<16:47:13, 1.35s/it] 28%|██▊ | 17300/61904 [8:46:10<16:27:57, 1.33s/it] {'loss': 2.6755, 'learning_rate': 1.7228704784130687e-07, 'epoch': 4.47} + 28%|██▊ | 17300/61904 [8:46:10<16:27:57, 1.33s/it] 28%|██▊ | 17301/61904 [8:46:12<16:30:45, 1.33s/it] 28%|██▊ | 17302/61904 [8:46:13<16:42:40, 1.35s/it] 28%|██▊ | 17303/61904 [8:46:15<17:22:41, 1.40s/it] 28%|██▊ | 17304/61904 [8:46:16<17:40:07, 1.43s/it] 28%|██▊ | 17305/61904 [8:46:17<17:30:05, 1.41s/it] 28%|██▊ | 17306/61904 [8:46:19<17:53:57, 1.44s/it] 28%|██▊ | 17307/61904 [8:46:20<18:08:04, 1.46s/it] 28%|██▊ | 17308/61904 [8:46:22<17:53:41, 1.44s/it] 28%|██▊ | 17309/61904 [8:46:23<17:14:58, 1.39s/it] 28%|██▊ | 17310/61904 [8:46:24<17:04:53, 1.38s/it] 28%|██▊ | 17311/61904 [8:46:26<17:36:03, 1.42s/it] 28%|██▊ | 17312/61904 [8:46:27<17:27:17, 1.41s/it] 28%|██▊ | 17313/61904 [8:46:29<17:04:10, 1.38s/it] 28%|██▊ | 17314/61904 [8:46:30<16:51:54, 1.36s/it] 28%|██▊ | 17315/61904 [8:46:31<16:35:14, 1.34s/it] 28%|██▊ | 17316/61904 [8:46:33<16:28:36, 1.33s/it] 28%|██▊ | 17317/61904 [8:46:34<15:59:11, 1.29s/it] 28%|██▊ | 17318/61904 [8:46:35<16:27:18, 1.33s/it] 28%|██▊ | 17319/61904 [8:46:37<16:53:16, 1.36s/it] 28%|██▊ | 17320/61904 [8:46:38<17:12:41, 1.39s/it] {'loss': 2.6354, 'learning_rate': 1.7225463503176453e-07, 'epoch': 4.48} + 28%|██▊ | 17320/61904 [8:46:38<17:12:41, 1.39s/it] 28%|██▊ | 17321/61904 [8:46:39<16:46:00, 1.35s/it] 28%|██▊ | 17322/61904 [8:46:41<16:57:18, 1.37s/it] 28%|██▊ | 17323/61904 [8:46:42<16:50:39, 1.36s/it] 28%|██▊ | 17324/61904 [8:46:43<16:33:00, 1.34s/it] 28%|██▊ | 17325/61904 [8:46:45<16:49:29, 1.36s/it] 28%|██▊ | 17326/61904 [8:46:46<16:34:27, 1.34s/it] 28%|██▊ | 17327/61904 [8:46:48<17:03:50, 1.38s/it] 28%|██▊ | 17328/61904 [8:46:49<16:47:41, 1.36s/it] 28%|██▊ | 17329/61904 [8:46:50<16:59:16, 1.37s/it] 28%|██▊ | 17330/61904 [8:46:51<16:23:32, 1.32s/it] 28%|██▊ | 17331/61904 [8:46:53<16:39:43, 1.35s/it] 28%|██▊ | 17332/61904 [8:46:54<17:19:21, 1.40s/it] 28%|██▊ | 17333/61904 [8:46:56<17:09:55, 1.39s/it] 28%|██▊ | 17334/61904 [8:46:57<17:36:38, 1.42s/it] 28%|██▊ | 17335/61904 [8:46:59<17:04:13, 1.38s/it] 28%|██▊ | 17336/61904 [8:47:00<16:36:18, 1.34s/it] 28%|██▊ | 17337/61904 [8:47:01<16:13:34, 1.31s/it] 28%|██▊ | 17338/61904 [8:47:02<16:29:38, 1.33s/it] 28%|██▊ | 17339/61904 [8:47:04<16:37:49, 1.34s/it] 28%|██▊ | 17340/61904 [8:47:05<16:35:41, 1.34s/it] {'loss': 2.6705, 'learning_rate': 1.7222222222222222e-07, 'epoch': 4.48} + 28%|██▊ | 17340/61904 [8:47:05<16:35:41, 1.34s/it] 28%|██▊ | 17341/61904 [8:47:07<16:43:57, 1.35s/it] 28%|██▊ | 17342/61904 [8:47:08<16:48:37, 1.36s/it] 28%|██▊ | 17343/61904 [8:47:09<16:20:13, 1.32s/it] 28%|██▊ | 17344/61904 [8:47:10<15:57:16, 1.29s/it] 28%|██▊ | 17345/61904 [8:47:12<16:37:25, 1.34s/it] 28%|██▊ | 17346/61904 [8:47:13<16:39:43, 1.35s/it] 28%|██▊ | 17347/61904 [8:47:15<16:55:19, 1.37s/it] 28%|██▊ | 17348/61904 [8:47:16<17:24:37, 1.41s/it] 28%|██▊ | 17349/61904 [8:47:17<17:25:09, 1.41s/it] 28%|██▊ | 17350/61904 [8:47:19<17:28:27, 1.41s/it] 28%|██▊ | 17351/61904 [8:47:20<17:31:57, 1.42s/it] 28%|██▊ | 17352/61904 [8:47:22<17:38:39, 1.43s/it] 28%|██▊ | 17353/61904 [8:47:23<17:26:46, 1.41s/it] 28%|██▊ | 17354/61904 [8:47:25<17:35:03, 1.42s/it] 28%|██▊ | 17355/61904 [8:47:26<17:33:13, 1.42s/it] 28%|██▊ | 17356/61904 [8:47:27<17:09:06, 1.39s/it] 28%|██▊ | 17357/61904 [8:47:29<17:17:02, 1.40s/it] 28%|██▊ | 17358/61904 [8:47:30<17:20:51, 1.40s/it] 28%|██▊ | 17359/61904 [8:47:32<17:18:45, 1.40s/it] 28%|██▊ | 17360/61904 [8:47:33<17:14:30, 1.39s/it] {'loss': 2.7144, 'learning_rate': 1.7218980941267988e-07, 'epoch': 4.49} + 28%|██▊ | 17360/61904 [8:47:33<17:14:30, 1.39s/it] 28%|██▊ | 17361/61904 [8:47:34<17:30:27, 1.41s/it] 28%|██▊ | 17362/61904 [8:47:36<17:45:39, 1.44s/it] 28%|██▊ | 17363/61904 [8:47:37<17:30:50, 1.42s/it] 28%|██▊ | 17364/61904 [8:47:39<17:25:09, 1.41s/it] 28%|██▊ | 17365/61904 [8:47:40<17:15:38, 1.40s/it] 28%|██▊ | 17366/61904 [8:47:41<16:45:28, 1.35s/it] 28%|██▊ | 17367/61904 [8:47:43<16:41:52, 1.35s/it] 28%|██▊ | 17368/61904 [8:47:44<16:48:50, 1.36s/it] 28%|██▊ | 17369/61904 [8:47:45<16:36:27, 1.34s/it] 28%|██▊ | 17370/61904 [8:47:47<16:33:26, 1.34s/it] 28%|██▊ | 17371/61904 [8:47:48<16:52:04, 1.36s/it] 28%|██▊ | 17372/61904 [8:47:49<16:59:43, 1.37s/it] 28%|██▊ | 17373/61904 [8:47:51<16:58:29, 1.37s/it] 28%|██▊ | 17374/61904 [8:47:52<16:46:35, 1.36s/it] 28%|██▊ | 17375/61904 [8:47:54<17:04:54, 1.38s/it] 28%|██▊ | 17376/61904 [8:47:55<17:05:43, 1.38s/it] 28%|██▊ | 17377/61904 [8:47:56<17:13:48, 1.39s/it] 28%|██▊ | 17378/61904 [8:47:58<17:20:15, 1.40s/it] 28%|██▊ | 17379/61904 [8:47:59<17:30:18, 1.42s/it] 28%|██▊ | 17380/61904 [8:48:01<17:28:08, 1.41s/it] {'loss': 2.7033, 'learning_rate': 1.7215739660313754e-07, 'epoch': 4.49} + 28%|██▊ | 17380/61904 [8:48:01<17:28:08, 1.41s/it] 28%|██▊ | 17381/61904 [8:48:02<17:28:46, 1.41s/it] 28%|██▊ | 17382/61904 [8:48:03<17:10:08, 1.39s/it] 28%|██▊ | 17383/61904 [8:48:05<16:59:48, 1.37s/it] 28%|██▊ | 17384/61904 [8:48:06<16:44:22, 1.35s/it] 28%|██▊ | 17385/61904 [8:48:07<16:48:51, 1.36s/it] 28%|██▊ | 17386/61904 [8:48:09<16:27:23, 1.33s/it] 28%|██▊ | 17387/61904 [8:48:10<16:58:01, 1.37s/it] 28%|██▊ | 17388/61904 [8:48:11<16:56:46, 1.37s/it] 28%|██▊ | 17389/61904 [8:48:13<17:04:55, 1.38s/it] 28%|██▊ | 17390/61904 [8:48:14<16:48:18, 1.36s/it] 28%|██▊ | 17391/61904 [8:48:15<16:32:33, 1.34s/it] 28%|██▊ | 17392/61904 [8:48:17<16:31:48, 1.34s/it] 28%|██▊ | 17393/61904 [8:48:18<16:44:10, 1.35s/it] 28%|██▊ | 17394/61904 [8:48:20<16:46:50, 1.36s/it] 28%|██▊ | 17395/61904 [8:48:21<16:30:50, 1.34s/it] 28%|██▊ | 17396/61904 [8:48:22<17:02:40, 1.38s/it] 28%|██▊ | 17397/61904 [8:48:24<16:41:03, 1.35s/it] 28%|██▊ | 17398/61904 [8:48:25<16:37:51, 1.35s/it] 28%|██▊ | 17399/61904 [8:48:26<16:54:25, 1.37s/it] 28%|██▊ | 17400/61904 [8:48:28<16:47:20, 1.36s/it] {'loss': 2.6926, 'learning_rate': 1.7212498379359523e-07, 'epoch': 4.5} + 28%|██▊ | 17400/61904 [8:48:28<16:47:20, 1.36s/it] 28%|██▊ | 17401/61904 [8:48:29<16:31:23, 1.34s/it] 28%|██▊ | 17402/61904 [8:48:31<17:30:36, 1.42s/it] 28%|██▊ | 17403/61904 [8:48:32<17:22:55, 1.41s/it] 28%|██▊ | 17404/61904 [8:48:33<17:20:27, 1.40s/it] 28%|██▊ | 17405/61904 [8:48:35<17:36:17, 1.42s/it] 28%|██▊ | 17406/61904 [8:48:36<17:16:34, 1.40s/it] 28%|██▊ | 17407/61904 [8:48:38<17:08:46, 1.39s/it] 28%|██▊ | 17408/61904 [8:48:39<17:02:58, 1.38s/it] 28%|██▊ | 17409/61904 [8:48:40<17:10:08, 1.39s/it] 28%|██▊ | 17410/61904 [8:48:42<17:20:32, 1.40s/it] 28%|██▊ | 17411/61904 [8:48:43<17:33:16, 1.42s/it] 28%|██▊ | 17412/61904 [8:48:45<18:10:42, 1.47s/it] 28%|██▊ | 17413/61904 [8:48:46<17:38:13, 1.43s/it] 28%|██▊ | 17414/61904 [8:48:48<17:38:43, 1.43s/it] 28%|██▊ | 17415/61904 [8:48:49<17:45:51, 1.44s/it] 28%|██▊ | 17416/61904 [8:48:51<18:01:37, 1.46s/it] 28%|██▊ | 17417/61904 [8:48:52<17:48:28, 1.44s/it] 28%|██▊ | 17418/61904 [8:48:53<17:20:18, 1.40s/it] 28%|██▊ | 17419/61904 [8:48:55<17:08:53, 1.39s/it] 28%|██▊ | 17420/61904 [8:48:56<17:31:56, 1.42s/it] {'loss': 2.6997, 'learning_rate': 1.7209257098405287e-07, 'epoch': 4.5} + 28%|██▊ | 17420/61904 [8:48:56<17:31:56, 1.42s/it] 28%|██▊ | 17421/61904 [8:48:57<17:16:09, 1.40s/it] 28%|██▊ | 17422/61904 [8:48:59<17:03:38, 1.38s/it] 28%|██▊ | 17423/61904 [8:49:00<17:36:38, 1.43s/it] 28%|██▊ | 17424/61904 [8:49:02<17:17:03, 1.40s/it] 28%|██▊ | 17425/61904 [8:49:03<17:28:11, 1.41s/it] 28%|██▊ | 17426/61904 [8:49:04<16:53:57, 1.37s/it] 28%|██▊ | 17427/61904 [8:49:06<17:04:05, 1.38s/it] 28%|██▊ | 17428/61904 [8:49:08<18:42:54, 1.51s/it] 28%|██▊ | 17429/61904 [8:49:09<17:47:46, 1.44s/it] 28%|██▊ | 17430/61904 [8:49:10<17:10:00, 1.39s/it] 28%|██▊ | 17431/61904 [8:49:11<16:25:37, 1.33s/it] 28%|██▊ | 17432/61904 [8:49:13<16:18:23, 1.32s/it] 28%|██▊ | 17433/61904 [8:49:14<16:17:01, 1.32s/it] 28%|██▊ | 17434/61904 [8:49:15<16:31:02, 1.34s/it] 28%|██▊ | 17435/61904 [8:49:17<16:44:04, 1.35s/it] 28%|██▊ | 17436/61904 [8:49:18<17:15:52, 1.40s/it] 28%|██▊ | 17437/61904 [8:49:20<17:11:36, 1.39s/it] 28%|██▊ | 17438/61904 [8:49:21<16:47:52, 1.36s/it] 28%|██▊ | 17439/61904 [8:49:22<16:59:58, 1.38s/it] 28%|██▊ | 17440/61904 [8:49:24<16:53:49, 1.37s/it] {'loss': 2.6363, 'learning_rate': 1.7206015817451056e-07, 'epoch': 4.51} + 28%|██▊ | 17440/61904 [8:49:24<16:53:49, 1.37s/it] 28%|██▊ | 17441/61904 [8:49:25<17:06:39, 1.39s/it] 28%|██▊ | 17442/61904 [8:49:26<16:53:44, 1.37s/it] 28%|██▊ | 17443/61904 [8:49:28<16:41:17, 1.35s/it] 28%|██▊ | 17444/61904 [8:49:29<16:40:21, 1.35s/it] 28%|██▊ | 17445/61904 [8:49:30<16:50:38, 1.36s/it] 28%|██▊ | 17446/61904 [8:49:32<17:13:08, 1.39s/it] 28%|██▊ | 17447/61904 [8:49:33<17:01:03, 1.38s/it] 28%|██▊ | 17448/61904 [8:49:35<16:31:09, 1.34s/it] 28%|██▊ | 17449/61904 [8:49:36<16:47:11, 1.36s/it] 28%|██▊ | 17450/61904 [8:49:37<16:43:42, 1.35s/it] 28%|██▊ | 17451/61904 [8:49:39<16:57:17, 1.37s/it] 28%|██▊ | 17452/61904 [8:49:40<17:19:01, 1.40s/it] 28%|██▊ | 17453/61904 [8:49:42<17:18:04, 1.40s/it] 28%|██▊ | 17454/61904 [8:49:43<17:16:31, 1.40s/it] 28%|██▊ | 17455/61904 [8:49:44<17:21:04, 1.41s/it] 28%|██▊ | 17456/61904 [8:49:46<17:19:34, 1.40s/it] 28%|██▊ | 17457/61904 [8:49:47<16:55:26, 1.37s/it] 28%|██▊ | 17458/61904 [8:49:48<16:42:16, 1.35s/it] 28%|██▊ | 17459/61904 [8:49:50<16:57:44, 1.37s/it] 28%|██▊ | 17460/61904 [8:49:51<16:09:19, 1.31s/it] {'loss': 2.7146, 'learning_rate': 1.7202774536496822e-07, 'epoch': 4.51} + 28%|██▊ | 17460/61904 [8:49:51<16:09:19, 1.31s/it] 28%|██▊ | 17461/61904 [8:49:52<16:07:51, 1.31s/it] 28%|██▊ | 17462/61904 [8:49:54<17:01:33, 1.38s/it] 28%|██▊ | 17463/61904 [8:49:55<17:33:35, 1.42s/it] 28%|██▊ | 17464/61904 [8:49:57<17:28:50, 1.42s/it] 28%|██▊ | 17465/61904 [8:49:58<17:28:40, 1.42s/it] 28%|██▊ | 17466/61904 [8:49:59<16:43:50, 1.36s/it] 28%|██▊ | 17467/61904 [8:50:01<16:25:33, 1.33s/it] 28%|██▊ | 17468/61904 [8:50:02<17:32:27, 1.42s/it] 28%|██▊ | 17469/61904 [8:50:04<17:00:02, 1.38s/it] 28%|██▊ | 17470/61904 [8:50:05<16:59:07, 1.38s/it] 28%|██▊ | 17471/61904 [8:50:06<16:34:30, 1.34s/it] 28%|██▊ | 17472/61904 [8:50:08<16:51:00, 1.37s/it] 28%|██▊ | 17473/61904 [8:50:09<16:29:07, 1.34s/it] 28%|██▊ | 17474/61904 [8:50:10<16:32:14, 1.34s/it] 28%|██▊ | 17475/61904 [8:50:12<16:27:58, 1.33s/it] 28%|██▊ | 17476/61904 [8:50:13<17:37:07, 1.43s/it] 28%|██▊ | 17477/61904 [8:50:15<17:36:22, 1.43s/it] 28%|██▊ | 17478/61904 [8:50:16<18:32:32, 1.50s/it] 28%|██▊ | 17479/61904 [8:50:18<17:50:52, 1.45s/it] 28%|██▊ | 17480/61904 [8:50:19<17:20:07, 1.40s/it] {'loss': 2.6798, 'learning_rate': 1.7199533255542588e-07, 'epoch': 4.52} + 28%|██▊ | 17480/61904 [8:50:19<17:20:07, 1.40s/it] 28%|██▊ | 17481/61904 [8:50:20<17:20:08, 1.40s/it] 28%|██▊ | 17482/61904 [8:50:22<17:20:36, 1.41s/it] 28%|██▊ | 17483/61904 [8:50:23<17:25:03, 1.41s/it] 28%|██▊ | 17484/61904 [8:50:24<17:11:26, 1.39s/it] 28%|██▊ | 17485/61904 [8:50:26<16:44:44, 1.36s/it] 28%|██▊ | 17486/61904 [8:50:27<16:41:21, 1.35s/it] 28%|██▊ | 17487/61904 [8:50:29<17:04:33, 1.38s/it] 28%|██▊ | 17488/61904 [8:50:30<18:22:09, 1.49s/it] 28%|██▊ | 17489/61904 [8:50:32<17:53:30, 1.45s/it] 28%|██▊ | 17490/61904 [8:50:33<17:52:47, 1.45s/it] 28%|██▊ | 17491/61904 [8:50:34<17:01:32, 1.38s/it] 28%|██▊ | 17492/61904 [8:50:36<17:01:18, 1.38s/it] 28%|██▊ | 17493/61904 [8:50:37<16:41:43, 1.35s/it] 28%|██▊ | 17494/61904 [8:50:38<16:42:58, 1.36s/it] 28%|██▊ | 17495/61904 [8:50:40<16:51:09, 1.37s/it] 28%|██▊ | 17496/61904 [8:50:41<17:03:17, 1.38s/it] 28%|██▊ | 17497/61904 [8:50:42<16:49:27, 1.36s/it] 28%|██▊ | 17498/61904 [8:50:44<16:36:58, 1.35s/it] 28%|██▊ | 17499/61904 [8:50:45<17:13:48, 1.40s/it] 28%|██▊ | 17500/61904 [8:50:47<16:35:28, 1.35s/it] {'loss': 2.6965, 'learning_rate': 1.7196291974588357e-07, 'epoch': 4.52} + 28%|██▊ | 17500/61904 [8:50:47<16:35:28, 1.35s/it] 28%|██▊ | 17501/61904 [8:50:48<16:15:16, 1.32s/it] 28%|██▊ | 17502/61904 [8:50:49<16:40:32, 1.35s/it] 28%|██▊ | 17503/61904 [8:50:51<16:44:29, 1.36s/it] 28%|██▊ | 17504/61904 [8:50:52<16:33:05, 1.34s/it] 28%|██▊ | 17505/61904 [8:50:53<16:35:19, 1.35s/it] 28%|██▊ | 17506/61904 [8:50:55<16:34:51, 1.34s/it] 28%|██▊ | 17507/61904 [8:50:56<16:36:32, 1.35s/it] 28%|██▊ | 17508/61904 [8:50:57<16:25:27, 1.33s/it] 28%|██▊ | 17509/61904 [8:50:59<16:33:07, 1.34s/it] 28%|██▊ | 17510/61904 [8:51:00<16:37:45, 1.35s/it] 28%|██▊ | 17511/61904 [8:51:01<16:10:46, 1.31s/it] 28%|██▊ | 17512/61904 [8:51:03<16:24:40, 1.33s/it] 28%|██▊ | 17513/61904 [8:51:04<16:16:56, 1.32s/it] 28%|██▊ | 17514/61904 [8:51:05<16:37:43, 1.35s/it] 28%|██▊ | 17515/61904 [8:51:07<16:37:26, 1.35s/it] 28%|██▊ | 17516/61904 [8:51:08<16:24:05, 1.33s/it] 28%|██▊ | 17517/61904 [8:51:09<16:05:27, 1.31s/it] 28%|██▊ | 17518/61904 [8:51:10<16:07:55, 1.31s/it] 28%|██▊ | 17519/61904 [8:51:12<16:04:09, 1.30s/it] 28%|██▊ | 17520/61904 [8:51:13<16:22:05, 1.33s/it] {'loss': 2.6914, 'learning_rate': 1.7193050693634123e-07, 'epoch': 4.53} + 28%|██▊ | 17520/61904 [8:51:13<16:22:05, 1.33s/it] 28%|██▊ | 17521/61904 [8:51:15<16:56:34, 1.37s/it] 28%|██▊ | 17522/61904 [8:51:16<17:08:23, 1.39s/it] 28%|██▊ | 17523/61904 [8:51:17<16:30:09, 1.34s/it] 28%|██▊ | 17524/61904 [8:51:19<16:17:20, 1.32s/it] 28%|██▊ | 17525/61904 [8:51:20<16:22:01, 1.33s/it] 28%|██▊ | 17526/61904 [8:51:21<16:36:10, 1.35s/it] 28%|██▊ | 17527/61904 [8:51:23<16:41:34, 1.35s/it] 28%|██▊ | 17528/61904 [8:51:24<16:20:16, 1.33s/it] 28%|██▊ | 17529/61904 [8:51:25<16:31:14, 1.34s/it] 28%|██▊ | 17530/61904 [8:51:27<16:13:23, 1.32s/it] 28%|██▊ | 17531/61904 [8:51:28<16:12:05, 1.31s/it] 28%|██▊ | 17532/61904 [8:51:29<16:26:33, 1.33s/it] 28%|██▊ | 17533/61904 [8:51:31<16:53:28, 1.37s/it] 28%|██▊ | 17534/61904 [8:51:32<16:23:44, 1.33s/it] 28%|██▊ | 17535/61904 [8:51:33<16:39:23, 1.35s/it] 28%|██▊ | 17536/61904 [8:51:35<16:42:36, 1.36s/it] 28%|██▊ | 17537/61904 [8:51:36<17:50:45, 1.45s/it] 28%|██▊ | 17538/61904 [8:51:38<17:15:27, 1.40s/it] 28%|██▊ | 17539/61904 [8:51:39<17:32:33, 1.42s/it] 28%|██▊ | 17540/61904 [8:51:40<17:02:52, 1.38s/it] {'loss': 2.7076, 'learning_rate': 1.718980941267989e-07, 'epoch': 4.53} + 28%|██▊ | 17540/61904 [8:51:40<17:02:52, 1.38s/it] 28%|██▊ | 17541/61904 [8:51:42<16:30:58, 1.34s/it] 28%|██▊ | 17542/61904 [8:51:43<16:46:50, 1.36s/it] 28%|██▊ | 17543/61904 [8:51:44<16:26:41, 1.33s/it] 28%|██▊ | 17544/61904 [8:51:46<16:47:17, 1.36s/it] 28%|██▊ | 17545/61904 [8:51:47<17:22:42, 1.41s/it] 28%|██▊ | 17546/61904 [8:51:49<17:11:22, 1.40s/it] 28%|██▊ | 17547/61904 [8:51:50<16:28:43, 1.34s/it] 28%|██▊ | 17548/61904 [8:51:51<16:07:05, 1.31s/it] 28%|██▊ | 17549/61904 [8:51:52<16:00:03, 1.30s/it] 28%|██▊ | 17550/61904 [8:51:54<16:00:58, 1.30s/it] 28%|██▊ | 17551/61904 [8:51:55<16:07:41, 1.31s/it] 28%|██▊ | 17552/61904 [8:51:56<16:29:14, 1.34s/it] 28%|██▊ | 17553/61904 [8:51:58<16:34:34, 1.35s/it] 28%|██▊ | 17554/61904 [8:51:59<16:23:04, 1.33s/it] 28%|██▊ | 17555/61904 [8:52:00<16:13:47, 1.32s/it] 28%|██▊ | 17556/61904 [8:52:02<16:46:24, 1.36s/it] 28%|██▊ | 17557/61904 [8:52:03<17:15:52, 1.40s/it] 28%|██▊ | 17558/61904 [8:52:05<17:18:28, 1.41s/it] 28%|██▊ | 17559/61904 [8:52:06<17:28:24, 1.42s/it] 28%|██▊ | 17560/61904 [8:52:08<17:06:48, 1.39s/it] {'loss': 2.6867, 'learning_rate': 1.7186568131725658e-07, 'epoch': 4.54} + 28%|██▊ | 17560/61904 [8:52:08<17:06:48, 1.39s/it] 28%|██▊ | 17561/61904 [8:52:09<17:04:07, 1.39s/it] 28%|██▊ | 17562/61904 [8:52:10<17:16:08, 1.40s/it] 28%|██▊ | 17563/61904 [8:52:12<17:00:25, 1.38s/it] 28%|██▊ | 17564/61904 [8:52:13<16:40:50, 1.35s/it] 28%|██▊ | 17565/61904 [8:52:14<16:50:26, 1.37s/it] 28%|██▊ | 17566/61904 [8:52:16<16:41:21, 1.36s/it] 28%|██▊ | 17567/61904 [8:52:17<17:24:37, 1.41s/it] 28%|██▊ | 17568/61904 [8:52:19<17:19:35, 1.41s/it] 28%|██▊ | 17569/61904 [8:52:20<17:12:18, 1.40s/it] 28%|██▊ | 17570/61904 [8:52:21<16:54:15, 1.37s/it] 28%|██▊ | 17571/61904 [8:52:23<16:25:40, 1.33s/it] 28%|██▊ | 17572/61904 [8:52:24<16:16:33, 1.32s/it] 28%|██▊ | 17573/61904 [8:52:25<16:59:11, 1.38s/it] 28%|██▊ | 17574/61904 [8:52:27<17:22:20, 1.41s/it] 28%|██▊ | 17575/61904 [8:52:28<17:35:30, 1.43s/it] 28%|██▊ | 17576/61904 [8:52:30<17:35:03, 1.43s/it] 28%|██▊ | 17577/61904 [8:52:31<17:28:43, 1.42s/it] 28%|██▊ | 17578/61904 [8:52:32<17:04:23, 1.39s/it] 28%|��█▊ | 17579/61904 [8:52:34<16:55:39, 1.37s/it] 28%|██▊ | 17580/61904 [8:52:35<16:46:21, 1.36s/it] {'loss': 2.7301, 'learning_rate': 1.7183326850771424e-07, 'epoch': 4.54} + 28%|██▊ | 17580/61904 [8:52:35<16:46:21, 1.36s/it] 28%|██▊ | 17581/61904 [8:52:37<17:02:07, 1.38s/it] 28%|██▊ | 17582/61904 [8:52:38<16:52:21, 1.37s/it] 28%|██▊ | 17583/61904 [8:52:39<16:59:24, 1.38s/it] 28%|██▊ | 17584/61904 [8:52:41<17:24:32, 1.41s/it] 28%|██▊ | 17585/61904 [8:52:42<16:57:25, 1.38s/it] 28%|██▊ | 17586/61904 [8:52:43<16:59:12, 1.38s/it] 28%|██▊ | 17587/61904 [8:52:45<16:46:14, 1.36s/it] 28%|██▊ | 17588/61904 [8:52:46<16:56:33, 1.38s/it] 28%|██▊ | 17589/61904 [8:52:48<17:03:28, 1.39s/it] 28%|██▊ | 17590/61904 [8:52:49<16:44:35, 1.36s/it] 28%|██▊ | 17591/61904 [8:52:50<17:11:54, 1.40s/it] 28%|██▊ | 17592/61904 [8:52:52<17:11:50, 1.40s/it] 28%|██▊ | 17593/61904 [8:52:53<16:58:39, 1.38s/it] 28%|██▊ | 17594/61904 [8:52:54<16:42:20, 1.36s/it] 28%|██▊ | 17595/61904 [8:52:56<16:55:38, 1.38s/it] 28%|██▊ | 17596/61904 [8:52:57<16:52:03, 1.37s/it] 28%|██▊ | 17597/61904 [8:52:59<16:39:11, 1.35s/it] 28%|██▊ | 17598/61904 [8:53:00<16:28:32, 1.34s/it] 28%|██▊ | 17599/61904 [8:53:01<16:13:18, 1.32s/it] 28%|██▊ | 17600/61904 [8:53:02<16:32:13, 1.34s/it] {'loss': 2.6684, 'learning_rate': 1.718008556981719e-07, 'epoch': 4.55} + 28%|██▊ | 17600/61904 [8:53:02<16:32:13, 1.34s/it] 28%|██▊ | 17601/61904 [8:53:04<16:08:01, 1.31s/it] 28%|██▊ | 17602/61904 [8:53:05<16:19:11, 1.33s/it] 28%|██▊ | 17603/61904 [8:53:07<16:40:21, 1.35s/it] 28%|██▊ | 17604/61904 [8:53:08<16:31:11, 1.34s/it] 28%|██▊ | 17605/61904 [8:53:09<16:36:24, 1.35s/it] 28%|██▊ | 17606/61904 [8:53:11<16:45:18, 1.36s/it] 28%|██▊ | 17607/61904 [8:53:12<16:33:25, 1.35s/it] 28%|██▊ | 17608/61904 [8:53:13<16:46:35, 1.36s/it] 28%|██▊ | 17609/61904 [8:53:15<16:54:23, 1.37s/it] 28%|██▊ | 17610/61904 [8:53:16<16:55:11, 1.38s/it] 28%|██▊ | 17611/61904 [8:53:17<16:36:15, 1.35s/it] 28%|██▊ | 17612/61904 [8:53:19<16:36:17, 1.35s/it] 28%|██▊ | 17613/61904 [8:53:20<16:21:24, 1.33s/it] 28%|██▊ | 17614/61904 [8:53:22<17:09:44, 1.39s/it] 28%|██▊ | 17615/61904 [8:53:23<16:38:31, 1.35s/it] 28%|██▊ | 17616/61904 [8:53:24<16:42:40, 1.36s/it] 28%|██▊ | 17617/61904 [8:53:25<16:17:23, 1.32s/it] 28%|██▊ | 17618/61904 [8:53:27<16:23:19, 1.33s/it] 28%|██▊ | 17619/61904 [8:53:28<16:16:54, 1.32s/it] 28%|██▊ | 17620/61904 [8:53:29<16:37:03, 1.35s/it] {'loss': 2.7038, 'learning_rate': 1.7176844288862957e-07, 'epoch': 4.55} + 28%|██▊ | 17620/61904 [8:53:29<16:37:03, 1.35s/it] 28%|██▊ | 17621/61904 [8:53:31<16:38:18, 1.35s/it] 28%|██▊ | 17622/61904 [8:53:32<16:37:40, 1.35s/it] 28%|██▊ | 17623/61904 [8:53:34<16:57:05, 1.38s/it] 28%|██▊ | 17624/61904 [8:53:35<16:56:54, 1.38s/it] 28%|██▊ | 17625/61904 [8:53:36<17:14:01, 1.40s/it] 28%|██▊ | 17626/61904 [8:53:38<16:55:44, 1.38s/it] 28%|██▊ | 17627/61904 [8:53:39<16:54:28, 1.37s/it] 28%|██▊ | 17628/61904 [8:53:40<16:33:14, 1.35s/it] 28%|██▊ | 17629/61904 [8:53:42<16:29:06, 1.34s/it] 28%|██▊ | 17630/61904 [8:53:43<16:23:30, 1.33s/it] 28%|██▊ | 17631/61904 [8:53:44<16:15:10, 1.32s/it] 28%|██▊ | 17632/61904 [8:53:46<16:30:40, 1.34s/it] 28%|██▊ | 17633/61904 [8:53:47<16:33:08, 1.35s/it] 28%|██▊ | 17634/61904 [8:53:48<16:23:48, 1.33s/it] 28%|██▊ | 17635/61904 [8:53:50<17:14:37, 1.40s/it] 28%|██▊ | 17636/61904 [8:53:51<16:43:50, 1.36s/it] 28%|██▊ | 17637/61904 [8:53:53<16:34:53, 1.35s/it] 28%|██▊ | 17638/61904 [8:53:54<16:22:31, 1.33s/it] 28%|██▊ | 17639/61904 [8:53:55<16:37:57, 1.35s/it] 28%|██▊ | 17640/61904 [8:53:57<16:57:41, 1.38s/it] {'loss': 2.6915, 'learning_rate': 1.7173603007908723e-07, 'epoch': 4.56} + 28%|██▊ | 17640/61904 [8:53:57<16:57:41, 1.38s/it] 28%|██▊ | 17641/61904 [8:53:58<17:47:44, 1.45s/it] 28%|██▊ | 17642/61904 [8:54:00<17:31:29, 1.43s/it] 29%|██▊ | 17643/61904 [8:54:01<17:13:03, 1.40s/it] 29%|██▊ | 17644/61904 [8:54:02<16:41:46, 1.36s/it] 29%|██▊ | 17645/61904 [8:54:04<17:05:57, 1.39s/it] 29%|██▊ | 17646/61904 [8:54:05<17:18:04, 1.41s/it] 29%|██▊ | 17647/61904 [8:54:07<17:05:38, 1.39s/it] 29%|██▊ | 17648/61904 [8:54:08<17:10:04, 1.40s/it] 29%|██▊ | 17649/61904 [8:54:09<17:33:57, 1.43s/it] 29%|██▊ | 17650/61904 [8:54:11<16:58:40, 1.38s/it] 29%|██▊ | 17651/61904 [8:54:12<17:19:17, 1.41s/it] 29%|██▊ | 17652/61904 [8:54:14<17:58:01, 1.46s/it] 29%|██▊ | 17653/61904 [8:54:15<17:18:07, 1.41s/it] 29%|██▊ | 17654/61904 [8:54:16<17:10:21, 1.40s/it] 29%|██▊ | 17655/61904 [8:54:18<17:16:31, 1.41s/it] 29%|██▊ | 17656/61904 [8:54:19<17:15:47, 1.40s/it] 29%|██▊ | 17657/61904 [8:54:21<16:57:20, 1.38s/it] 29%|██▊ | 17658/61904 [8:54:22<16:31:03, 1.34s/it] 29%|██▊ | 17659/61904 [8:54:23<16:30:00, 1.34s/it] 29%|██▊ | 17660/61904 [8:54:25<16:50:35, 1.37s/it] {'loss': 2.6953, 'learning_rate': 1.7170361726954492e-07, 'epoch': 4.56} + 29%|██▊ | 17660/61904 [8:54:25<16:50:35, 1.37s/it] 29%|██▊ | 17661/61904 [8:54:26<16:53:03, 1.37s/it] 29%|██▊ | 17662/61904 [8:54:27<17:09:48, 1.40s/it] 29%|██▊ | 17663/61904 [8:54:29<17:40:38, 1.44s/it] 29%|██▊ | 17664/61904 [8:54:30<17:06:56, 1.39s/it] 29%|██▊ | 17665/61904 [8:54:32<16:53:42, 1.37s/it] 29%|██▊ | 17666/61904 [8:54:33<16:40:24, 1.36s/it] 29%|██▊ | 17667/61904 [8:54:34<16:48:19, 1.37s/it] 29%|██▊ | 17668/61904 [8:54:36<17:15:33, 1.40s/it] 29%|██▊ | 17669/61904 [8:54:37<16:57:46, 1.38s/it] 29%|██▊ | 17670/61904 [8:54:38<16:36:07, 1.35s/it] 29%|██▊ | 17671/61904 [8:54:40<17:01:56, 1.39s/it] 29%|██▊ | 17672/61904 [8:54:41<17:22:24, 1.41s/it] 29%|██▊ | 17673/61904 [8:54:43<17:10:45, 1.40s/it] 29%|██▊ | 17674/61904 [8:54:44<16:46:03, 1.36s/it] 29%|██▊ | 17675/61904 [8:54:45<17:05:28, 1.39s/it] 29%|██▊ | 17676/61904 [8:54:47<16:55:16, 1.38s/it] 29%|██▊ | 17677/61904 [8:54:48<16:39:53, 1.36s/it] 29%|██▊ | 17678/61904 [8:54:50<17:08:25, 1.40s/it] 29%|██▊ | 17679/61904 [8:54:51<16:39:13, 1.36s/it] 29%|██▊ | 17680/61904 [8:54:52<16:27:10, 1.34s/it] {'loss': 2.7303, 'learning_rate': 1.7167120446000258e-07, 'epoch': 4.57} + 29%|██▊ | 17680/61904 [8:54:52<16:27:10, 1.34s/it] 29%|██▊ | 17681/61904 [8:54:54<16:36:24, 1.35s/it] 29%|██▊ | 17682/61904 [8:54:55<17:19:08, 1.41s/it] 29%|██▊ | 17683/61904 [8:54:56<17:14:43, 1.40s/it] 29%|██▊ | 17684/61904 [8:54:58<17:17:27, 1.41s/it] 29%|██▊ | 17685/61904 [8:54:59<16:59:49, 1.38s/it] 29%|██▊ | 17686/61904 [8:55:01<16:47:47, 1.37s/it] 29%|██▊ | 17687/61904 [8:55:02<17:07:24, 1.39s/it] 29%|██▊ | 17688/61904 [8:55:03<17:15:07, 1.40s/it] 29%|██▊ | 17689/61904 [8:55:05<17:15:16, 1.40s/it] 29%|██▊ | 17690/61904 [8:55:06<17:31:16, 1.43s/it] 29%|██▊ | 17691/61904 [8:55:08<17:15:31, 1.41s/it] 29%|██▊ | 17692/61904 [8:55:09<16:52:54, 1.37s/it] 29%|██▊ | 17693/61904 [8:55:10<17:09:32, 1.40s/it] 29%|██▊ | 17694/61904 [8:55:12<17:20:48, 1.41s/it] 29%|██▊ | 17695/61904 [8:55:13<17:17:30, 1.41s/it] 29%|██▊ | 17696/61904 [8:55:15<17:53:19, 1.46s/it] 29%|██▊ | 17697/61904 [8:55:16<17:26:17, 1.42s/it] 29%|██▊ | 17698/61904 [8:55:18<17:23:14, 1.42s/it] 29%|██▊ | 17699/61904 [8:55:19<17:08:54, 1.40s/it] 29%|██▊ | 17700/61904 [8:55:20<16:51:09, 1.37s/it] {'loss': 2.6225, 'learning_rate': 1.7163879165046024e-07, 'epoch': 4.57} + 29%|██▊ | 17700/61904 [8:55:20<16:51:09, 1.37s/it] 29%|██▊ | 17701/61904 [8:55:22<16:49:14, 1.37s/it] 29%|██▊ | 17702/61904 [8:55:23<16:25:32, 1.34s/it] 29%|██▊ | 17703/61904 [8:55:25<17:59:16, 1.47s/it] 29%|██▊ | 17704/61904 [8:55:26<17:58:08, 1.46s/it] 29%|██▊ | 17705/61904 [8:55:28<17:54:37, 1.46s/it] 29%|██▊ | 17706/61904 [8:55:29<17:11:50, 1.40s/it] 29%|██▊ | 17707/61904 [8:55:31<18:41:01, 1.52s/it] 29%|██▊ | 17708/61904 [8:55:32<18:30:47, 1.51s/it] 29%|██▊ | 17709/61904 [8:55:33<18:01:15, 1.47s/it] 29%|██▊ | 17710/61904 [8:55:35<17:48:50, 1.45s/it] 29%|██▊ | 17711/61904 [8:55:36<17:31:32, 1.43s/it] 29%|██▊ | 17712/61904 [8:55:38<17:34:00, 1.43s/it] 29%|██▊ | 17713/61904 [8:55:39<17:13:38, 1.40s/it] 29%|██▊ | 17714/61904 [8:55:40<16:48:41, 1.37s/it] 29%|██▊ | 17715/61904 [8:55:42<16:55:44, 1.38s/it] 29%|██▊ | 17716/61904 [8:55:43<16:48:48, 1.37s/it] 29%|██▊ | 17717/61904 [8:55:45<17:16:05, 1.41s/it] 29%|██▊ | 17718/61904 [8:55:46<17:42:32, 1.44s/it] 29%|██▊ | 17719/61904 [8:55:47<17:31:13, 1.43s/it] 29%|██▊ | 17720/61904 [8:55:49<16:57:52, 1.38s/it] {'loss': 2.682, 'learning_rate': 1.7160637884091793e-07, 'epoch': 4.58} + 29%|██▊ | 17720/61904 [8:55:49<16:57:52, 1.38s/it] 29%|██▊ | 17721/61904 [8:55:50<18:00:58, 1.47s/it] 29%|██▊ | 17722/61904 [8:55:52<17:15:30, 1.41s/it] 29%|██▊ | 17723/61904 [8:55:53<16:39:19, 1.36s/it] 29%|██▊ | 17724/61904 [8:55:54<16:28:25, 1.34s/it] 29%|██▊ | 17725/61904 [8:55:56<16:26:03, 1.34s/it] 29%|██▊ | 17726/61904 [8:55:57<16:41:45, 1.36s/it] 29%|██▊ | 17727/61904 [8:55:59<17:29:15, 1.43s/it] 29%|██▊ | 17728/61904 [8:56:00<17:27:51, 1.42s/it] 29%|██▊ | 17729/61904 [8:56:01<17:26:09, 1.42s/it] 29%|██▊ | 17730/61904 [8:56:03<17:47:02, 1.45s/it] 29%|██▊ | 17731/61904 [8:56:04<17:54:03, 1.46s/it] 29%|██▊ | 17732/61904 [8:56:06<17:38:16, 1.44s/it] 29%|██▊ | 17733/61904 [8:56:07<17:05:30, 1.39s/it] 29%|██▊ | 17734/61904 [8:56:09<17:12:09, 1.40s/it] 29%|██▊ | 17735/61904 [8:56:10<17:08:53, 1.40s/it] 29%|██▊ | 17736/61904 [8:56:11<16:50:26, 1.37s/it] 29%|██▊ | 17737/61904 [8:56:12<16:23:25, 1.34s/it] 29%|██▊ | 17738/61904 [8:56:14<16:05:39, 1.31s/it] 29%|██▊ | 17739/61904 [8:56:15<16:46:59, 1.37s/it] 29%|██▊ | 17740/61904 [8:56:17<17:23:09, 1.42s/it] {'loss': 2.6886, 'learning_rate': 1.715739660313756e-07, 'epoch': 4.58} + 29%|██▊ | 17740/61904 [8:56:17<17:23:09, 1.42s/it] 29%|██▊ | 17741/61904 [8:56:18<17:18:50, 1.41s/it] 29%|██▊ | 17742/61904 [8:56:20<17:15:03, 1.41s/it] 29%|██▊ | 17743/61904 [8:56:21<16:58:32, 1.38s/it] 29%|██▊ | 17744/61904 [8:56:22<17:08:19, 1.40s/it] 29%|██▊ | 17745/61904 [8:56:24<17:20:15, 1.41s/it] 29%|██▊ | 17746/61904 [8:56:25<17:07:30, 1.40s/it] 29%|██▊ | 17747/61904 [8:56:26<16:51:12, 1.37s/it] 29%|██▊ | 17748/61904 [8:56:28<17:05:15, 1.39s/it] 29%|██▊ | 17749/61904 [8:56:29<17:17:52, 1.41s/it] 29%|██▊ | 17750/61904 [8:56:31<17:16:19, 1.41s/it] 29%|██▊ | 17751/61904 [8:56:32<17:03:55, 1.39s/it] 29%|██▊ | 17752/61904 [8:56:33<17:04:17, 1.39s/it] 29%|██▊ | 17753/61904 [8:56:35<17:45:36, 1.45s/it] 29%|██▊ | 17754/61904 [8:56:36<17:10:58, 1.40s/it] 29%|██▊ | 17755/61904 [8:56:38<17:00:02, 1.39s/it] 29%|██▊ | 17756/61904 [8:56:39<16:30:01, 1.35s/it] 29%|██▊ | 17757/61904 [8:56:40<16:47:17, 1.37s/it] 29%|██▊ | 17758/61904 [8:56:42<17:02:14, 1.39s/it] 29%|██▊ | 17759/61904 [8:56:43<17:21:37, 1.42s/it] 29%|██▊ | 17760/61904 [8:56:45<17:08:10, 1.40s/it] {'loss': 2.5928, 'learning_rate': 1.7154155322183325e-07, 'epoch': 4.59} + 29%|██▊ | 17760/61904 [8:56:45<17:08:10, 1.40s/it] 29%|██▊ | 17761/61904 [8:56:46<16:54:50, 1.38s/it] 29%|██▊ | 17762/61904 [8:56:47<16:25:18, 1.34s/it] 29%|██▊ | 17763/61904 [8:56:49<16:19:36, 1.33s/it] 29%|██▊ | 17764/61904 [8:56:50<16:10:36, 1.32s/it] 29%|██▊ | 17765/61904 [8:56:51<16:32:38, 1.35s/it] 29%|██▊ | 17766/61904 [8:56:53<16:54:24, 1.38s/it] 29%|██▊ | 17767/61904 [8:56:54<16:53:40, 1.38s/it] 29%|██▊ | 17768/61904 [8:56:55<16:45:11, 1.37s/it] 29%|██▊ | 17769/61904 [8:56:57<16:32:00, 1.35s/it] 29%|██▊ | 17770/61904 [8:56:58<16:04:07, 1.31s/it] 29%|██▊ | 17771/61904 [8:56:59<16:04:01, 1.31s/it] 29%|██▊ | 17772/61904 [8:57:01<16:14:19, 1.32s/it] 29%|██▊ | 17773/61904 [8:57:02<16:32:56, 1.35s/it] 29%|██▊ | 17774/61904 [8:57:04<17:08:12, 1.40s/it] 29%|██▊ | 17775/61904 [8:57:05<16:59:45, 1.39s/it] 29%|██▊ | 17776/61904 [8:57:06<16:59:58, 1.39s/it] 29%|██▊ | 17777/61904 [8:57:08<16:49:47, 1.37s/it] 29%|██▊ | 17778/61904 [8:57:09<17:04:20, 1.39s/it] 29%|██▊ | 17779/61904 [8:57:10<17:07:49, 1.40s/it] 29%|██▊ | 17780/61904 [8:57:12<16:58:46, 1.39s/it] {'loss': 2.7206, 'learning_rate': 1.7150914041229094e-07, 'epoch': 4.59} + 29%|██▊ | 17780/61904 [8:57:12<16:58:46, 1.39s/it] 29%|██▊ | 17781/61904 [8:57:13<16:41:47, 1.36s/it] 29%|██▊ | 17782/61904 [8:57:14<16:30:42, 1.35s/it] 29%|██▊ | 17783/61904 [8:57:16<16:06:19, 1.31s/it] 29%|██▊ | 17784/61904 [8:57:17<15:59:59, 1.31s/it] 29%|██▊ | 17785/61904 [8:57:18<16:30:36, 1.35s/it] 29%|██▊ | 17786/61904 [8:57:20<16:30:07, 1.35s/it] 29%|██▊ | 17787/61904 [8:57:21<16:53:04, 1.38s/it] 29%|██▊ | 17788/61904 [8:57:22<16:31:50, 1.35s/it] 29%|██▊ | 17789/61904 [8:57:24<16:39:30, 1.36s/it] 29%|██▊ | 17790/61904 [8:57:25<16:14:49, 1.33s/it] 29%|██▊ | 17791/61904 [8:57:27<16:36:16, 1.36s/it] 29%|██▊ | 17792/61904 [8:57:28<17:34:18, 1.43s/it] 29%|██▊ | 17793/61904 [8:57:30<17:24:53, 1.42s/it] 29%|██▊ | 17794/61904 [8:57:31<17:29:49, 1.43s/it] 29%|██▊ | 17795/61904 [8:57:32<17:11:16, 1.40s/it] 29%|██▊ | 17796/61904 [8:57:34<17:11:59, 1.40s/it] 29%|██▊ | 17797/61904 [8:57:35<16:43:11, 1.36s/it] 29%|██▉ | 17798/61904 [8:57:36<16:16:07, 1.33s/it] 29%|██▉ | 17799/61904 [8:57:37<16:00:17, 1.31s/it] 29%|██▉ | 17800/61904 [8:57:39<16:25:54, 1.34s/it] {'loss': 2.7234, 'learning_rate': 1.714767276027486e-07, 'epoch': 4.6} + 29%|██▉ | 17800/61904 [8:57:39<16:25:54, 1.34s/it] 29%|██▉ | 17801/61904 [8:57:40<17:07:17, 1.40s/it] 29%|██▉ | 17802/61904 [8:57:42<17:30:03, 1.43s/it] 29%|██▉ | 17803/61904 [8:57:43<17:47:15, 1.45s/it] 29%|██▉ | 17804/61904 [8:57:45<17:18:11, 1.41s/it] 29%|██▉ | 17805/61904 [8:57:46<16:51:14, 1.38s/it] 29%|██▉ | 17806/61904 [8:57:47<16:36:47, 1.36s/it] 29%|██▉ | 17807/61904 [8:57:49<16:25:44, 1.34s/it] 29%|██▉ | 17808/61904 [8:57:50<16:24:44, 1.34s/it] 29%|██▉ | 17809/61904 [8:57:51<16:21:39, 1.34s/it] 29%|██▉ | 17810/61904 [8:57:53<16:35:22, 1.35s/it] 29%|██▉ | 17811/61904 [8:57:54<16:32:58, 1.35s/it] 29%|██▉ | 17812/61904 [8:57:55<16:21:33, 1.34s/it] 29%|██▉ | 17813/61904 [8:57:57<16:26:42, 1.34s/it] 29%|██▉ | 17814/61904 [8:57:58<16:29:14, 1.35s/it] 29%|██▉ | 17815/61904 [8:58:00<16:46:08, 1.37s/it] 29%|██▉ | 17816/61904 [8:58:01<16:34:23, 1.35s/it] 29%|██▉ | 17817/61904 [8:58:02<17:10:43, 1.40s/it] 29%|██▉ | 17818/61904 [8:58:04<17:27:38, 1.43s/it] 29%|██▉ | 17819/61904 [8:58:05<17:02:40, 1.39s/it] 29%|██▉ | 17820/61904 [8:58:06<16:49:05, 1.37s/it] {'loss': 2.6776, 'learning_rate': 1.7144431479320626e-07, 'epoch': 4.61} + 29%|██▉ | 17820/61904 [8:58:06<16:49:05, 1.37s/it] 29%|██▉ | 17821/61904 [8:58:08<17:09:28, 1.40s/it] 29%|██▉ | 17822/61904 [8:58:09<16:48:07, 1.37s/it] 29%|██▉ | 17823/61904 [8:58:11<17:05:24, 1.40s/it] 29%|██▉ | 17824/61904 [8:58:12<16:59:09, 1.39s/it] 29%|██▉ | 17825/61904 [8:58:13<16:47:45, 1.37s/it] 29%|██▉ | 17826/61904 [8:58:15<16:46:31, 1.37s/it] 29%|██▉ | 17827/61904 [8:58:16<16:40:32, 1.36s/it] 29%|██▉ | 17828/61904 [8:58:17<16:19:15, 1.33s/it] 29%|██▉ | 17829/61904 [8:58:19<16:31:48, 1.35s/it] 29%|██▉ | 17830/61904 [8:58:20<17:28:52, 1.43s/it] 29%|██▉ | 17831/61904 [8:58:22<16:38:21, 1.36s/it] 29%|██▉ | 17832/61904 [8:58:23<16:35:26, 1.36s/it] 29%|██▉ | 17833/61904 [8:58:24<16:37:37, 1.36s/it] 29%|██▉ | 17834/61904 [8:58:26<16:42:39, 1.37s/it] 29%|██▉ | 17835/61904 [8:58:27<16:26:18, 1.34s/it] 29%|██▉ | 17836/61904 [8:58:28<16:33:03, 1.35s/it] 29%|██▉ | 17837/61904 [8:58:30<16:23:22, 1.34s/it] 29%|██▉ | 17838/61904 [8:58:31<16:50:33, 1.38s/it] 29%|██▉ | 17839/61904 [8:58:33<17:01:56, 1.39s/it] 29%|██▉ | 17840/61904 [8:58:34<17:00:16, 1.39s/it] {'loss': 2.7249, 'learning_rate': 1.7141190198366393e-07, 'epoch': 4.61} + 29%|██▉ | 17840/61904 [8:58:34<17:00:16, 1.39s/it] 29%|██▉ | 17841/61904 [8:58:35<17:05:31, 1.40s/it] 29%|██▉ | 17842/61904 [8:58:37<17:34:11, 1.44s/it] 29%|██▉ | 17843/61904 [8:58:38<17:44:04, 1.45s/it] 29%|██▉ | 17844/61904 [8:58:40<17:35:03, 1.44s/it] 29%|██▉ | 17845/61904 [8:58:41<17:08:37, 1.40s/it] 29%|██▉ | 17846/61904 [8:58:43<17:22:50, 1.42s/it] 29%|██▉ | 17847/61904 [8:58:44<17:11:58, 1.41s/it] 29%|██▉ | 17848/61904 [8:58:45<16:58:11, 1.39s/it] 29%|██▉ | 17849/61904 [8:58:47<17:20:34, 1.42s/it] 29%|██▉ | 17850/61904 [8:58:48<16:52:37, 1.38s/it] 29%|██▉ | 17851/61904 [8:58:49<16:59:49, 1.39s/it] 29%|██▉ | 17852/61904 [8:58:51<17:09:30, 1.40s/it] 29%|██▉ | 17853/61904 [8:58:52<16:49:23, 1.37s/it] 29%|██▉ | 17854/61904 [8:58:54<17:45:08, 1.45s/it] 29%|██▉ | 17855/61904 [8:58:55<17:38:06, 1.44s/it] 29%|██▉ | 17856/61904 [8:58:57<17:18:46, 1.41s/it] 29%|██▉ | 17857/61904 [8:58:58<16:51:45, 1.38s/it] 29%|██▉ | 17858/61904 [8:58:59<16:39:35, 1.36s/it] 29%|██▉ | 17859/61904 [8:59:01<16:42:57, 1.37s/it] 29%|██▉ | 17860/61904 [8:59:02<17:13:50, 1.41s/it] {'loss': 2.7109, 'learning_rate': 1.713794891741216e-07, 'epoch': 4.62} + 29%|██▉ | 17860/61904 [8:59:02<17:13:50, 1.41s/it] 29%|██▉ | 17861/61904 [8:59:03<17:02:53, 1.39s/it] 29%|██▉ | 17862/61904 [8:59:05<16:34:37, 1.36s/it] 29%|██▉ | 17863/61904 [8:59:06<16:50:34, 1.38s/it] 29%|██▉ | 17864/61904 [8:59:07<16:48:55, 1.37s/it] 29%|██▉ | 17865/61904 [8:59:09<17:21:27, 1.42s/it] 29%|██▉ | 17866/61904 [8:59:10<16:57:13, 1.39s/it] 29%|██▉ | 17867/61904 [8:59:12<16:51:48, 1.38s/it] 29%|██▉ | 17868/61904 [8:59:13<16:30:29, 1.35s/it] 29%|██▉ | 17869/61904 [8:59:14<16:18:39, 1.33s/it] 29%|██▉ | 17870/61904 [8:59:16<16:08:17, 1.32s/it] 29%|██▉ | 17871/61904 [8:59:17<16:26:30, 1.34s/it] 29%|██▉ | 17872/61904 [8:59:18<16:15:03, 1.33s/it] 29%|██▉ | 17873/61904 [8:59:20<15:58:04, 1.31s/it] 29%|██▉ | 17874/61904 [8:59:21<16:12:50, 1.33s/it] 29%|██▉ | 17875/61904 [8:59:22<15:53:38, 1.30s/it] 29%|██▉ | 17876/61904 [8:59:23<16:07:42, 1.32s/it] 29%|██▉ | 17877/61904 [8:59:25<16:25:56, 1.34s/it] 29%|██▉ | 17878/61904 [8:59:26<15:57:52, 1.31s/it] 29%|██▉ | 17879/61904 [8:59:27<15:58:34, 1.31s/it] 29%|██▉ | 17880/61904 [8:59:29<16:14:40, 1.33s/it] {'loss': 2.7566, 'learning_rate': 1.7134707636457928e-07, 'epoch': 4.62} + 29%|██▉ | 17880/61904 [8:59:29<16:14:40, 1.33s/it] 29%|██▉ | 17881/61904 [8:59:30<16:09:11, 1.32s/it] 29%|██▉ | 17882/61904 [8:59:31<16:16:23, 1.33s/it] 29%|██▉ | 17883/61904 [8:59:33<16:42:26, 1.37s/it] 29%|██▉ | 17884/61904 [8:59:34<16:34:32, 1.36s/it] 29%|██▉ | 17885/61904 [8:59:36<17:13:27, 1.41s/it] 29%|██▉ | 17886/61904 [8:59:37<16:43:55, 1.37s/it] 29%|██▉ | 17887/61904 [8:59:38<16:46:56, 1.37s/it] 29%|██▉ | 17888/61904 [8:59:40<16:09:21, 1.32s/it] 29%|██▉ | 17889/61904 [8:59:41<16:23:57, 1.34s/it] 29%|██▉ | 17890/61904 [8:59:42<16:30:16, 1.35s/it] 29%|██▉ | 17891/61904 [8:59:44<16:12:38, 1.33s/it] 29%|██▉ | 17892/61904 [8:59:45<16:59:55, 1.39s/it] 29%|██▉ | 17893/61904 [8:59:47<16:55:42, 1.38s/it] 29%|██▉ | 17894/61904 [8:59:48<16:26:35, 1.35s/it] 29%|██▉ | 17895/61904 [8:59:49<16:17:44, 1.33s/it] 29%|██▉ | 17896/61904 [8:59:50<16:29:58, 1.35s/it] 29%|██▉ | 17897/61904 [8:59:52<17:07:21, 1.40s/it] 29%|██▉ | 17898/61904 [8:59:53<16:58:49, 1.39s/it] 29%|██▉ | 17899/61904 [8:59:55<16:31:20, 1.35s/it] 29%|██▉ | 17900/61904 [8:59:56<16:49:15, 1.38s/it] {'loss': 2.6878, 'learning_rate': 1.7131466355503694e-07, 'epoch': 4.63} + 29%|██▉ | 17900/61904 [8:59:56<16:49:15, 1.38s/it] 29%|██▉ | 17901/61904 [8:59:57<16:26:31, 1.35s/it] 29%|██▉ | 17902/61904 [8:59:59<16:55:40, 1.38s/it] 29%|██▉ | 17903/61904 [9:00:00<16:21:49, 1.34s/it] 29%|██▉ | 17904/61904 [9:00:01<16:24:52, 1.34s/it] 29%|██▉ | 17905/61904 [9:00:03<16:00:21, 1.31s/it] 29%|██▉ | 17906/61904 [9:00:04<16:30:03, 1.35s/it] 29%|██▉ | 17907/61904 [9:00:06<16:47:34, 1.37s/it] 29%|██▉ | 17908/61904 [9:00:07<16:41:46, 1.37s/it] 29%|██▉ | 17909/61904 [9:00:08<16:57:45, 1.39s/it] 29%|██▉ | 17910/61904 [9:00:10<16:30:07, 1.35s/it] 29%|██▉ | 17911/61904 [9:00:11<16:09:18, 1.32s/it] 29%|██▉ | 17912/61904 [9:00:12<16:02:56, 1.31s/it] 29%|██▉ | 17913/61904 [9:00:13<15:57:30, 1.31s/it] 29%|██▉ | 17914/61904 [9:00:15<16:05:03, 1.32s/it] 29%|██▉ | 17915/61904 [9:00:16<16:53:58, 1.38s/it] 29%|██▉ | 17916/61904 [9:00:18<17:20:26, 1.42s/it] 29%|██▉ | 17917/61904 [9:00:19<17:01:53, 1.39s/it] 29%|██▉ | 17918/61904 [9:00:20<16:41:42, 1.37s/it] 29%|██▉ | 17919/61904 [9:00:22<16:42:04, 1.37s/it] 29%|██▉ | 17920/61904 [9:00:23<16:16:54, 1.33s/it] {'loss': 2.7529, 'learning_rate': 1.712822507454946e-07, 'epoch': 4.63} + 29%|██▉ | 17920/61904 [9:00:23<16:16:54, 1.33s/it] 29%|██▉ | 17921/61904 [9:00:24<16:02:05, 1.31s/it] 29%|██▉ | 17922/61904 [9:00:26<16:24:50, 1.34s/it] 29%|██▉ | 17923/61904 [9:00:27<16:44:58, 1.37s/it] 29%|██▉ | 17924/61904 [9:00:28<16:34:53, 1.36s/it] 29%|██▉ | 17925/61904 [9:00:30<17:06:29, 1.40s/it] 29%|██▉ | 17926/61904 [9:00:31<16:37:52, 1.36s/it] 29%|██▉ | 17927/61904 [9:00:33<16:19:39, 1.34s/it] 29%|██▉ | 17928/61904 [9:00:34<16:18:53, 1.34s/it] 29%|██▉ | 17929/61904 [9:00:35<16:26:54, 1.35s/it] 29%|██▉ | 17930/61904 [9:00:37<16:40:27, 1.37s/it] 29%|██▉ | 17931/61904 [9:00:38<16:44:37, 1.37s/it] 29%|██▉ | 17932/61904 [9:00:39<16:49:39, 1.38s/it] 29%|██▉ | 17933/61904 [9:00:41<16:44:00, 1.37s/it] 29%|██▉ | 17934/61904 [9:00:42<16:50:11, 1.38s/it] 29%|██▉ | 17935/61904 [9:00:44<16:43:23, 1.37s/it] 29%|██▉ | 17936/61904 [9:00:45<16:39:08, 1.36s/it] 29%|██▉ | 17937/61904 [9:00:46<16:33:29, 1.36s/it] 29%|██▉ | 17938/61904 [9:00:48<16:45:20, 1.37s/it] 29%|██▉ | 17939/61904 [9:00:49<16:59:08, 1.39s/it] 29%|██▉ | 17940/61904 [9:00:50<16:48:35, 1.38s/it] {'loss': 2.5671, 'learning_rate': 1.712498379359523e-07, 'epoch': 4.64} + 29%|██▉ | 17940/61904 [9:00:50<16:48:35, 1.38s/it] 29%|██▉ | 17941/61904 [9:00:52<17:04:20, 1.40s/it] 29%|██▉ | 17942/61904 [9:00:53<17:02:18, 1.40s/it] 29%|██▉ | 17943/61904 [9:00:55<16:55:40, 1.39s/it] 29%|██▉ | 17944/61904 [9:00:56<16:33:13, 1.36s/it] 29%|██▉ | 17945/61904 [9:00:57<16:58:59, 1.39s/it] 29%|██▉ | 17946/61904 [9:00:59<17:14:03, 1.41s/it] 29%|██▉ | 17947/61904 [9:01:00<17:05:48, 1.40s/it] 29%|██▉ | 17948/61904 [9:01:02<17:13:32, 1.41s/it] 29%|██▉ | 17949/61904 [9:01:03<17:21:27, 1.42s/it] 29%|██▉ | 17950/61904 [9:01:04<16:58:07, 1.39s/it] 29%|██▉ | 17951/61904 [9:01:06<16:51:09, 1.38s/it] 29%|██▉ | 17952/61904 [9:01:07<16:41:36, 1.37s/it] 29%|██▉ | 17953/61904 [9:01:08<16:27:27, 1.35s/it] 29%|██▉ | 17954/61904 [9:01:10<16:55:15, 1.39s/it] 29%|██▉ | 17955/61904 [9:01:11<16:58:44, 1.39s/it] 29%|██▉ | 17956/61904 [9:01:13<17:10:38, 1.41s/it] 29%|██▉ | 17957/61904 [9:01:14<17:50:13, 1.46s/it] 29%|██▉ | 17958/61904 [9:01:16<17:07:37, 1.40s/it] 29%|██▉ | 17959/61904 [9:01:17<17:15:37, 1.41s/it] 29%|██▉ | 17960/61904 [9:01:18<17:14:59, 1.41s/it] {'loss': 2.7142, 'learning_rate': 1.7121742512640995e-07, 'epoch': 4.64} + 29%|██▉ | 17960/61904 [9:01:18<17:14:59, 1.41s/it] 29%|██▉ | 17961/61904 [9:01:20<16:46:53, 1.37s/it] 29%|██▉ | 17962/61904 [9:01:21<16:44:28, 1.37s/it] 29%|██▉ | 17963/61904 [9:01:22<16:29:47, 1.35s/it] 29%|██▉ | 17964/61904 [9:01:24<16:23:17, 1.34s/it] 29%|██▉ | 17965/61904 [9:01:25<16:27:56, 1.35s/it] 29%|██▉ | 17966/61904 [9:01:27<16:54:30, 1.39s/it] 29%|██▉ | 17967/61904 [9:01:28<17:11:18, 1.41s/it] 29%|██▉ | 17968/61904 [9:01:29<17:20:07, 1.42s/it] 29%|██▉ | 17969/61904 [9:01:31<17:00:32, 1.39s/it] 29%|██▉ | 17970/61904 [9:01:32<16:37:17, 1.36s/it] 29%|██▉ | 17971/61904 [9:01:33<16:37:30, 1.36s/it] 29%|██▉ | 17972/61904 [9:01:35<16:34:12, 1.36s/it] 29%|██▉ | 17973/61904 [9:01:36<16:23:39, 1.34s/it] 29%|██▉ | 17974/61904 [9:01:37<16:22:09, 1.34s/it] 29%|██▉ | 17975/61904 [9:01:39<16:36:17, 1.36s/it] 29%|██▉ | 17976/61904 [9:01:40<16:46:00, 1.37s/it] 29%|██▉ | 17977/61904 [9:01:42<16:25:59, 1.35s/it] 29%|██▉ | 17978/61904 [9:01:43<16:00:01, 1.31s/it] 29%|██▉ | 17979/61904 [9:01:44<15:42:21, 1.29s/it] 29%|██▉ | 17980/61904 [9:01:45<15:52:24, 1.30s/it] {'loss': 2.6711, 'learning_rate': 1.711850123168676e-07, 'epoch': 4.65} + 29%|██▉ | 17980/61904 [9:01:45<15:52:24, 1.30s/it] 29%|██▉ | 17981/61904 [9:01:47<16:16:46, 1.33s/it] 29%|██▉ | 17982/61904 [9:01:48<16:38:47, 1.36s/it] 29%|██▉ | 17983/61904 [9:01:50<16:49:50, 1.38s/it] 29%|██▉ | 17984/61904 [9:01:51<16:43:28, 1.37s/it] 29%|██▉ | 17985/61904 [9:01:52<16:41:16, 1.37s/it] 29%|██▉ | 17986/61904 [9:01:54<16:45:44, 1.37s/it] 29%|██▉ | 17987/61904 [9:01:55<17:07:35, 1.40s/it] 29%|██▉ | 17988/61904 [9:01:56<16:25:15, 1.35s/it] 29%|██▉ | 17989/61904 [9:01:58<16:26:35, 1.35s/it] 29%|██▉ | 17990/61904 [9:01:59<15:56:46, 1.31s/it] 29%|██▉ | 17991/61904 [9:02:00<16:47:45, 1.38s/it] 29%|██▉ | 17992/61904 [9:02:02<16:31:17, 1.35s/it] 29%|██▉ | 17993/61904 [9:02:03<16:27:40, 1.35s/it] 29%|██▉ | 17994/61904 [9:02:04<16:21:15, 1.34s/it] 29%|██▉ | 17995/61904 [9:02:06<16:44:13, 1.37s/it] 29%|██▉ | 17996/61904 [9:02:07<16:55:01, 1.39s/it] 29%|██▉ | 17997/61904 [9:02:09<17:33:22, 1.44s/it] 29%|██▉ | 17998/61904 [9:02:10<17:09:16, 1.41s/it] 29%|██▉ | 17999/61904 [9:02:11<16:37:39, 1.36s/it] 29%|██▉ | 18000/61904 [9:02:13<16:28:08, 1.35s/it] {'loss': 2.7033, 'learning_rate': 1.7115259950732527e-07, 'epoch': 4.65} + 29%|██▉ | 18000/61904 [9:02:13<16:28:08, 1.35s/it] 29%|██▉ | 18001/61904 [9:02:14<16:57:52, 1.39s/it] 29%|██▉ | 18002/61904 [9:02:16<16:30:30, 1.35s/it] 29%|██▉ | 18003/61904 [9:02:17<16:23:55, 1.34s/it] 29%|██▉ | 18004/61904 [9:02:18<16:36:15, 1.36s/it] 29%|██▉ | 18005/61904 [9:02:20<16:56:36, 1.39s/it] 29%|██▉ | 18006/61904 [9:02:21<16:35:03, 1.36s/it] 29%|██▉ | 18007/61904 [9:02:22<16:36:14, 1.36s/it] 29%|██▉ | 18008/61904 [9:02:24<16:27:39, 1.35s/it] 29%|██▉ | 18009/61904 [9:02:25<16:56:02, 1.39s/it] 29%|██▉ | 18010/61904 [9:02:27<16:50:32, 1.38s/it] 29%|██▉ | 18011/61904 [9:02:28<16:46:16, 1.38s/it] 29%|██▉ | 18012/61904 [9:02:29<16:17:41, 1.34s/it] 29%|██▉ | 18013/61904 [9:02:31<16:41:51, 1.37s/it] 29%|██▉ | 18014/61904 [9:02:32<16:33:59, 1.36s/it] 29%|██▉ | 18015/61904 [9:02:33<16:35:10, 1.36s/it] 29%|██▉ | 18016/61904 [9:02:35<16:08:28, 1.32s/it] 29%|██▉ | 18017/61904 [9:02:36<16:28:01, 1.35s/it] 29%|██▉ | 18018/61904 [9:02:37<16:06:37, 1.32s/it] 29%|██▉ | 18019/61904 [9:02:39<16:34:26, 1.36s/it] 29%|██▉ | 18020/61904 [9:02:40<16:44:47, 1.37s/it] {'loss': 2.639, 'learning_rate': 1.7112018669778294e-07, 'epoch': 4.66} + 29%|██▉ | 18020/61904 [9:02:40<16:44:47, 1.37s/it] 29%|██▉ | 18021/61904 [9:02:41<17:00:29, 1.40s/it] 29%|██▉ | 18022/61904 [9:02:43<16:36:06, 1.36s/it] 29%|██▉ | 18023/61904 [9:02:44<17:09:16, 1.41s/it] 29%|██▉ | 18024/61904 [9:02:46<17:05:23, 1.40s/it] 29%|██▉ | 18025/61904 [9:02:47<17:37:02, 1.45s/it] 29%|██▉ | 18026/61904 [9:02:49<17:07:51, 1.41s/it] 29%|██▉ | 18027/61904 [9:02:50<16:24:02, 1.35s/it] 29%|██▉ | 18028/61904 [9:02:51<16:28:01, 1.35s/it] 29%|██▉ | 18029/61904 [9:02:52<16:05:44, 1.32s/it] 29%|██▉ | 18030/61904 [9:02:54<16:38:30, 1.37s/it] 29%|██▉ | 18031/61904 [9:02:55<16:34:59, 1.36s/it] 29%|██▉ | 18032/61904 [9:02:57<16:45:49, 1.38s/it] 29%|██▉ | 18033/61904 [9:02:58<16:37:53, 1.36s/it] 29%|██▉ | 18034/61904 [9:02:59<16:58:10, 1.39s/it] 29%|██▉ | 18035/61904 [9:03:01<16:38:53, 1.37s/it] 29%|██▉ | 18036/61904 [9:03:02<16:19:07, 1.34s/it] 29%|██▉ | 18037/61904 [9:03:03<16:38:39, 1.37s/it] 29%|██▉ | 18038/61904 [9:03:05<16:08:46, 1.33s/it] 29%|██▉ | 18039/61904 [9:03:06<16:44:14, 1.37s/it] 29%|██▉ | 18040/61904 [9:03:07<16:33:57, 1.36s/it] {'loss': 2.6856, 'learning_rate': 1.7108777388824062e-07, 'epoch': 4.66} + 29%|██▉ | 18040/61904 [9:03:07<16:33:57, 1.36s/it] 29%|██▉ | 18041/61904 [9:03:09<16:12:26, 1.33s/it] 29%|██▉ | 18042/61904 [9:03:10<16:03:57, 1.32s/it] 29%|██▉ | 18043/61904 [9:03:11<16:07:29, 1.32s/it] 29%|██▉ | 18044/61904 [9:03:13<16:47:49, 1.38s/it] 29%|██▉ | 18045/61904 [9:03:14<17:15:48, 1.42s/it] 29%|██▉ | 18046/61904 [9:03:16<16:57:37, 1.39s/it] 29%|██▉ | 18047/61904 [9:03:17<16:51:38, 1.38s/it] 29%|██▉ | 18048/61904 [9:03:18<16:58:51, 1.39s/it] 29%|██▉ | 18049/61904 [9:03:20<17:30:57, 1.44s/it] 29%|██▉ | 18050/61904 [9:03:21<17:30:08, 1.44s/it] 29%|██▉ | 18051/61904 [9:03:23<17:08:15, 1.41s/it] 29%|██▉ | 18052/61904 [9:03:24<16:51:28, 1.38s/it] 29%|██▉ | 18053/61904 [9:03:25<16:47:38, 1.38s/it] 29%|██▉ | 18054/61904 [9:03:27<16:51:29, 1.38s/it] 29%|██▉ | 18055/61904 [9:03:28<16:31:20, 1.36s/it] 29%|██▉ | 18056/61904 [9:03:30<16:40:46, 1.37s/it] 29%|██▉ | 18057/61904 [9:03:31<16:27:25, 1.35s/it] 29%|██▉ | 18058/61904 [9:03:32<16:29:58, 1.35s/it] 29%|██▉ | 18059/61904 [9:03:34<16:21:36, 1.34s/it] 29%|██▉ | 18060/61904 [9:03:35<15:54:02, 1.31s/it] {'loss': 2.7362, 'learning_rate': 1.7105536107869829e-07, 'epoch': 4.67} + 29%|██▉ | 18060/61904 [9:03:35<15:54:02, 1.31s/it] 29%|██▉ | 18061/61904 [9:03:36<16:46:11, 1.38s/it] 29%|██▉ | 18062/61904 [9:03:38<16:37:40, 1.37s/it] 29%|██▉ | 18063/61904 [9:03:39<16:12:02, 1.33s/it] 29%|██▉ | 18064/61904 [9:03:40<15:51:06, 1.30s/it] 29%|██▉ | 18065/61904 [9:03:41<15:46:00, 1.29s/it] 29%|██▉ | 18066/61904 [9:03:43<15:52:49, 1.30s/it] 29%|██▉ | 18067/61904 [9:03:44<16:34:13, 1.36s/it] 29%|██▉ | 18068/61904 [9:03:46<16:40:14, 1.37s/it] 29%|██▉ | 18069/61904 [9:03:47<16:37:52, 1.37s/it] 29%|██▉ | 18070/61904 [9:03:48<16:29:42, 1.35s/it] 29%|██▉ | 18071/61904 [9:03:50<16:36:48, 1.36s/it] 29%|██▉ | 18072/61904 [9:03:51<16:42:58, 1.37s/it] 29%|██▉ | 18073/61904 [9:03:53<17:03:23, 1.40s/it] 29%|██▉ | 18074/61904 [9:03:54<17:06:45, 1.41s/it] 29%|██▉ | 18075/61904 [9:03:55<17:05:50, 1.40s/it] 29%|██▉ | 18076/61904 [9:03:57<17:02:24, 1.40s/it] 29%|██▉ | 18077/61904 [9:03:58<17:02:26, 1.40s/it] 29%|██▉ | 18078/61904 [9:04:00<17:14:23, 1.42s/it] 29%|██▉ | 18079/61904 [9:04:01<17:26:46, 1.43s/it] 29%|██▉ | 18080/61904 [9:04:02<16:47:06, 1.38s/it] {'loss': 2.6738, 'learning_rate': 1.7102294826915595e-07, 'epoch': 4.67} + 29%|██▉ | 18080/61904 [9:04:02<16:47:06, 1.38s/it] 29%|██▉ | 18081/61904 [9:04:04<16:42:10, 1.37s/it] 29%|██▉ | 18082/61904 [9:04:05<16:52:30, 1.39s/it] 29%|██▉ | 18083/61904 [9:04:06<16:36:29, 1.36s/it] 29%|██▉ | 18084/61904 [9:04:08<16:59:02, 1.40s/it] 29%|██▉ | 18085/61904 [9:04:09<17:11:22, 1.41s/it] 29%|██▉ | 18086/61904 [9:04:11<17:05:21, 1.40s/it] 29%|██▉ | 18087/61904 [9:04:12<16:31:44, 1.36s/it] 29%|██▉ | 18088/61904 [9:04:13<16:22:08, 1.34s/it] 29%|██▉ | 18089/61904 [9:04:15<16:30:03, 1.36s/it] 29%|██▉ | 18090/61904 [9:04:16<16:22:40, 1.35s/it] 29%|██▉ | 18091/61904 [9:04:17<16:20:59, 1.34s/it] 29%|██▉ | 18092/61904 [9:04:19<16:38:36, 1.37s/it] 29%|██▉ | 18093/61904 [9:04:20<16:17:44, 1.34s/it] 29%|██▉ | 18094/61904 [9:04:21<16:26:31, 1.35s/it] 29%|██▉ | 18095/61904 [9:04:23<16:05:04, 1.32s/it] 29%|██▉ | 18096/61904 [9:04:24<16:06:16, 1.32s/it] 29%|██▉ | 18097/61904 [9:04:25<16:12:37, 1.33s/it] 29%|██▉ | 18098/61904 [9:04:27<16:23:16, 1.35s/it] 29%|██▉ | 18099/61904 [9:04:28<16:12:13, 1.33s/it] 29%|██▉ | 18100/61904 [9:04:29<16:17:34, 1.34s/it] {'loss': 2.7141, 'learning_rate': 1.7099053545961364e-07, 'epoch': 4.68} + 29%|██▉ | 18100/61904 [9:04:29<16:17:34, 1.34s/it] 29%|██▉ | 18101/61904 [9:04:31<16:24:55, 1.35s/it] 29%|██▉ | 18102/61904 [9:04:32<16:25:15, 1.35s/it] 29%|██▉ | 18103/61904 [9:04:33<16:23:27, 1.35s/it] 29%|██▉ | 18104/61904 [9:04:35<16:31:12, 1.36s/it] 29%|██▉ | 18105/61904 [9:04:36<16:15:51, 1.34s/it] 29%|██▉ | 18106/61904 [9:04:38<17:00:15, 1.40s/it] 29%|██▉ | 18107/61904 [9:04:39<16:41:57, 1.37s/it] 29%|██▉ | 18108/61904 [9:04:40<17:02:57, 1.40s/it] 29%|██▉ | 18109/61904 [9:04:42<16:42:09, 1.37s/it] 29%|██▉ | 18110/61904 [9:04:43<16:32:14, 1.36s/it] 29%|██▉ | 18111/61904 [9:04:44<16:31:35, 1.36s/it] 29%|██▉ | 18112/61904 [9:04:46<16:28:26, 1.35s/it] 29%|██▉ | 18113/61904 [9:04:47<16:07:59, 1.33s/it] 29%|██▉ | 18114/61904 [9:04:49<16:58:51, 1.40s/it] 29%|██▉ | 18115/61904 [9:04:50<16:36:19, 1.37s/it] 29%|██▉ | 18116/61904 [9:04:51<17:22:46, 1.43s/it] 29%|██▉ | 18117/61904 [9:04:53<17:29:27, 1.44s/it] 29%|██▉ | 18118/61904 [9:04:54<17:01:52, 1.40s/it] 29%|██▉ | 18119/61904 [9:04:56<16:48:36, 1.38s/it] 29%|██▉ | 18120/61904 [9:04:57<17:07:21, 1.41s/it] {'loss': 2.6066, 'learning_rate': 1.709581226500713e-07, 'epoch': 4.68} + 29%|██▉ | 18120/61904 [9:04:57<17:07:21, 1.41s/it] 29%|██▉ | 18121/61904 [9:04:58<16:52:53, 1.39s/it] 29%|██▉ | 18122/61904 [9:05:00<16:58:08, 1.40s/it] 29%|██▉ | 18123/61904 [9:05:01<17:03:36, 1.40s/it] 29%|██▉ | 18124/61904 [9:05:03<16:54:15, 1.39s/it] 29%|██▉ | 18125/61904 [9:05:04<17:09:26, 1.41s/it] 29%|██▉ | 18126/61904 [9:05:05<17:05:16, 1.41s/it] 29%|██▉ | 18127/61904 [9:05:07<16:53:39, 1.39s/it] 29%|██▉ | 18128/61904 [9:05:08<16:33:23, 1.36s/it] 29%|██▉ | 18129/61904 [9:05:09<16:28:44, 1.36s/it] 29%|██▉ | 18130/61904 [9:05:11<16:15:42, 1.34s/it] 29%|██▉ | 18131/61904 [9:05:12<16:23:15, 1.35s/it] 29%|██▉ | 18132/61904 [9:05:13<16:31:56, 1.36s/it] 29%|██▉ | 18133/61904 [9:05:15<16:20:51, 1.34s/it] 29%|██▉ | 18134/61904 [9:05:16<16:10:26, 1.33s/it] 29%|██▉ | 18135/61904 [9:05:17<15:59:28, 1.32s/it] 29%|██▉ | 18136/61904 [9:05:19<16:03:56, 1.32s/it] 29%|██▉ | 18137/61904 [9:05:20<15:49:59, 1.30s/it] 29%|██▉ | 18138/61904 [9:05:21<15:46:58, 1.30s/it] 29%|██▉ | 18139/61904 [9:05:23<15:57:44, 1.31s/it] 29%|██▉ | 18140/61904 [9:05:24<16:04:07, 1.32s/it] {'loss': 2.6816, 'learning_rate': 1.7092570984052896e-07, 'epoch': 4.69} + 29%|██▉ | 18140/61904 [9:05:24<16:04:07, 1.32s/it] 29%|██▉ | 18141/61904 [9:05:25<16:29:28, 1.36s/it] 29%|██▉ | 18142/61904 [9:05:27<16:38:06, 1.37s/it] 29%|██▉ | 18143/61904 [9:05:28<16:19:05, 1.34s/it] 29%|██▉ | 18144/61904 [9:05:29<16:04:15, 1.32s/it] 29%|██▉ | 18145/61904 [9:05:31<16:52:48, 1.39s/it] 29%|██▉ | 18146/61904 [9:05:32<17:02:41, 1.40s/it] 29%|██▉ | 18147/61904 [9:05:33<16:23:24, 1.35s/it] 29%|██▉ | 18148/61904 [9:05:35<16:21:14, 1.35s/it] 29%|██▉ | 18149/61904 [9:05:36<16:35:29, 1.37s/it] 29%|██▉ | 18150/61904 [9:05:38<16:52:02, 1.39s/it] 29%|██▉ | 18151/61904 [9:05:39<16:57:31, 1.40s/it] 29%|██▉ | 18152/61904 [9:05:40<16:53:14, 1.39s/it] 29%|██▉ | 18153/61904 [9:05:42<17:23:23, 1.43s/it] 29%|██▉ | 18154/61904 [9:05:44<18:13:23, 1.50s/it] 29%|██▉ | 18155/61904 [9:05:45<17:50:57, 1.47s/it] 29%|██▉ | 18156/61904 [9:05:46<16:44:31, 1.38s/it] 29%|██▉ | 18157/61904 [9:05:48<16:47:10, 1.38s/it] 29%|██▉ | 18158/61904 [9:05:49<16:49:18, 1.38s/it] 29%|██▉ | 18159/61904 [9:05:50<16:25:29, 1.35s/it] 29%|██▉ | 18160/61904 [9:05:52<16:25:05, 1.35s/it] {'loss': 2.6816, 'learning_rate': 1.7089329703098665e-07, 'epoch': 4.69} + 29%|██▉ | 18160/61904 [9:05:52<16:25:05, 1.35s/it] 29%|██▉ | 18161/61904 [9:05:53<16:03:53, 1.32s/it] 29%|██▉ | 18162/61904 [9:05:54<16:10:39, 1.33s/it] 29%|██▉ | 18163/61904 [9:05:56<16:56:07, 1.39s/it] 29%|██▉ | 18164/61904 [9:05:57<16:57:19, 1.40s/it] 29%|██▉ | 18165/61904 [9:05:59<16:53:21, 1.39s/it] 29%|██▉ | 18166/61904 [9:06:00<16:47:39, 1.38s/it] 29%|██▉ | 18167/61904 [9:06:01<16:58:16, 1.40s/it] 29%|██▉ | 18168/61904 [9:06:03<17:27:41, 1.44s/it] 29%|██▉ | 18169/61904 [9:06:04<17:13:51, 1.42s/it] 29%|██▉ | 18170/61904 [9:06:06<16:45:35, 1.38s/it] 29%|██▉ | 18171/61904 [9:06:07<16:44:07, 1.38s/it] 29%|██▉ | 18172/61904 [9:06:08<16:37:01, 1.37s/it] 29%|██▉ | 18173/61904 [9:06:10<17:11:24, 1.42s/it] 29%|██▉ | 18174/61904 [9:06:11<16:37:16, 1.37s/it] 29%|██▉ | 18175/61904 [9:06:12<16:21:34, 1.35s/it] 29%|██▉ | 18176/61904 [9:06:14<16:13:19, 1.34s/it] 29%|██▉ | 18177/61904 [9:06:15<16:55:45, 1.39s/it] 29%|██▉ | 18178/61904 [9:06:16<16:31:12, 1.36s/it] 29%|██▉ | 18179/61904 [9:06:18<16:20:06, 1.34s/it] 29%|██▉ | 18180/61904 [9:06:19<16:06:48, 1.33s/it] {'loss': 2.7157, 'learning_rate': 1.708608842214443e-07, 'epoch': 4.7} + 29%|██▉ | 18180/61904 [9:06:19<16:06:48, 1.33s/it] 29%|██▉ | 18181/61904 [9:06:20<16:06:13, 1.33s/it] 29%|██▉ | 18182/61904 [9:06:22<16:07:35, 1.33s/it] 29%|██▉ | 18183/61904 [9:06:23<16:02:47, 1.32s/it] 29%|██▉ | 18184/61904 [9:06:24<15:48:21, 1.30s/it] 29%|██▉ | 18185/61904 [9:06:26<16:10:30, 1.33s/it] 29%|██▉ | 18186/61904 [9:06:27<16:41:24, 1.37s/it] 29%|██▉ | 18187/61904 [9:06:29<16:44:34, 1.38s/it] 29%|██▉ | 18188/61904 [9:06:30<16:37:48, 1.37s/it] 29%|██▉ | 18189/61904 [9:06:31<16:18:39, 1.34s/it] 29%|██▉ | 18190/61904 [9:06:33<16:27:06, 1.35s/it] 29%|██▉ | 18191/61904 [9:06:34<17:05:45, 1.41s/it] 29%|██▉ | 18192/61904 [9:06:36<17:22:15, 1.43s/it] 29%|██▉ | 18193/61904 [9:06:37<17:05:23, 1.41s/it] 29%|██▉ | 18194/61904 [9:06:38<17:01:31, 1.40s/it] 29%|██▉ | 18195/61904 [9:06:40<16:36:15, 1.37s/it] 29%|██▉ | 18196/61904 [9:06:41<16:28:42, 1.36s/it] 29%|██▉ | 18197/61904 [9:06:42<16:33:48, 1.36s/it] 29%|██▉ | 18198/61904 [9:06:44<16:22:28, 1.35s/it] 29%|██▉ | 18199/61904 [9:06:45<15:59:26, 1.32s/it] 29%|██▉ | 18200/61904 [9:06:46<16:23:12, 1.35s/it] {'loss': 2.7059, 'learning_rate': 1.7082847141190197e-07, 'epoch': 4.7} + 29%|██▉ | 18200/61904 [9:06:46<16:23:12, 1.35s/it] 29%|██▉ | 18201/61904 [9:06:48<16:04:29, 1.32s/it] 29%|██▉ | 18202/61904 [9:06:49<16:03:45, 1.32s/it] 29%|██▉ | 18203/61904 [9:06:50<16:29:29, 1.36s/it] 29%|██▉ | 18204/61904 [9:06:52<16:13:22, 1.34s/it] 29%|██▉ | 18205/61904 [9:06:53<16:57:49, 1.40s/it] 29%|██▉ | 18206/61904 [9:06:55<18:44:39, 1.54s/it] 29%|██▉ | 18207/61904 [9:06:56<17:37:14, 1.45s/it] 29%|██▉ | 18208/61904 [9:06:58<17:22:02, 1.43s/it] 29%|██▉ | 18209/61904 [9:06:59<16:35:45, 1.37s/it] 29%|██▉ | 18210/61904 [9:07:00<17:10:23, 1.41s/it] 29%|██▉ | 18211/61904 [9:07:02<17:06:01, 1.41s/it] 29%|██▉ | 18212/61904 [9:07:03<17:10:46, 1.42s/it] 29%|██▉ | 18213/61904 [9:07:05<16:48:22, 1.38s/it] 29%|██▉ | 18214/61904 [9:07:06<16:40:38, 1.37s/it] 29%|██▉ | 18215/61904 [9:07:07<16:58:15, 1.40s/it] 29%|██▉ | 18216/61904 [9:07:09<16:42:52, 1.38s/it] 29%|██▉ | 18217/61904 [9:07:10<16:39:39, 1.37s/it] 29%|██▉ | 18218/61904 [9:07:11<16:19:15, 1.34s/it] 29%|██▉ | 18219/61904 [9:07:13<16:24:08, 1.35s/it] 29%|██▉ | 18220/61904 [9:07:14<16:39:15, 1.37s/it] {'loss': 2.6542, 'learning_rate': 1.7079605860235963e-07, 'epoch': 4.71} + 29%|██▉ | 18220/61904 [9:07:14<16:39:15, 1.37s/it] 29%|██▉ | 18221/61904 [9:07:15<16:36:26, 1.37s/it] 29%|██▉ | 18222/61904 [9:07:17<16:06:37, 1.33s/it] 29%|██▉ | 18223/61904 [9:07:18<16:21:24, 1.35s/it] 29%|██▉ | 18224/61904 [9:07:20<16:52:28, 1.39s/it] 29%|██▉ | 18225/61904 [9:07:21<16:46:11, 1.38s/it] 29%|██▉ | 18226/61904 [9:07:22<16:12:00, 1.34s/it] 29%|██▉ | 18227/61904 [9:07:23<16:02:06, 1.32s/it] 29%|██▉ | 18228/61904 [9:07:25<15:52:41, 1.31s/it] 29%|██▉ | 18229/61904 [9:07:26<15:42:30, 1.29s/it] 29%|██▉ | 18230/61904 [9:07:27<15:37:37, 1.29s/it] 29%|██▉ | 18231/61904 [9:07:29<16:27:00, 1.36s/it] 29%|██▉ | 18232/61904 [9:07:30<16:24:16, 1.35s/it] 29%|██▉ | 18233/61904 [9:07:32<16:53:43, 1.39s/it] 29%|██▉ | 18234/61904 [9:07:33<16:31:55, 1.36s/it] 29%|██▉ | 18235/61904 [9:07:35<17:34:06, 1.45s/it] 29%|██▉ | 18236/61904 [9:07:36<17:32:40, 1.45s/it] 29%|██▉ | 18237/61904 [9:07:37<16:58:10, 1.40s/it] 29%|██▉ | 18238/61904 [9:07:39<16:42:33, 1.38s/it] 29%|██▉ | 18239/61904 [9:07:40<16:57:39, 1.40s/it] 29%|██▉ | 18240/61904 [9:07:41<16:22:29, 1.35s/it] {'loss': 2.6852, 'learning_rate': 1.707636457928173e-07, 'epoch': 4.71} + 29%|██▉ | 18240/61904 [9:07:41<16:22:29, 1.35s/it] 29%|██▉ | 18241/61904 [9:07:43<16:35:51, 1.37s/it] 29%|██▉ | 18242/61904 [9:07:44<17:09:42, 1.42s/it] 29%|██▉ | 18243/61904 [9:07:46<17:04:19, 1.41s/it] 29%|██▉ | 18244/61904 [9:07:47<16:40:14, 1.37s/it] 29%|██▉ | 18245/61904 [9:07:48<16:28:57, 1.36s/it] 29%|██▉ | 18246/61904 [9:07:50<16:36:20, 1.37s/it] 29%|██▉ | 18247/61904 [9:07:51<16:22:55, 1.35s/it] 29%|██▉ | 18248/61904 [9:07:52<16:12:28, 1.34s/it] 29%|██▉ | 18249/61904 [9:07:53<15:47:22, 1.30s/it] 29%|██▉ | 18250/61904 [9:07:55<16:35:28, 1.37s/it] 29%|██▉ | 18251/61904 [9:07:56<16:36:47, 1.37s/it] 29%|██▉ | 18252/61904 [9:07:58<16:11:31, 1.34s/it] 29%|██▉ | 18253/61904 [9:07:59<15:42:12, 1.30s/it] 29%|██▉ | 18254/61904 [9:08:00<15:42:07, 1.30s/it] 29%|██▉ | 18255/61904 [9:08:01<15:52:45, 1.31s/it] 29%|██▉ | 18256/61904 [9:08:03<16:25:04, 1.35s/it] 29%|██▉ | 18257/61904 [9:08:04<16:23:09, 1.35s/it] 29%|██▉ | 18258/61904 [9:08:06<16:04:26, 1.33s/it] 29%|██▉ | 18259/61904 [9:08:07<16:09:32, 1.33s/it] 29%|██▉ | 18260/61904 [9:08:08<15:59:25, 1.32s/it] {'loss': 2.7449, 'learning_rate': 1.7073123298327498e-07, 'epoch': 4.72} + 29%|██▉ | 18260/61904 [9:08:08<15:59:25, 1.32s/it] 29%|██▉ | 18261/61904 [9:08:09<15:51:05, 1.31s/it] 30%|██▉ | 18262/61904 [9:08:11<15:50:28, 1.31s/it] 30%|██▉ | 18263/61904 [9:08:12<16:54:52, 1.40s/it] 30%|██▉ | 18264/61904 [9:08:14<16:19:20, 1.35s/it] 30%|██▉ | 18265/61904 [9:08:15<16:29:51, 1.36s/it] 30%|██▉ | 18266/61904 [9:08:16<17:00:02, 1.40s/it] 30%|██▉ | 18267/61904 [9:08:18<17:42:58, 1.46s/it] 30%|██▉ | 18268/61904 [9:08:19<17:12:00, 1.42s/it] 30%|██▉ | 18269/61904 [9:08:21<17:29:32, 1.44s/it] 30%|██▉ | 18270/61904 [9:08:22<17:55:04, 1.48s/it] 30%|██▉ | 18271/61904 [9:08:24<17:27:03, 1.44s/it] 30%|██▉ | 18272/61904 [9:08:25<17:18:04, 1.43s/it] 30%|██▉ | 18273/61904 [9:08:27<17:43:57, 1.46s/it] 30%|██▉ | 18274/61904 [9:08:28<17:12:39, 1.42s/it] 30%|██▉ | 18275/61904 [9:08:29<16:49:24, 1.39s/it] 30%|██▉ | 18276/61904 [9:08:31<16:39:38, 1.37s/it] 30%|██▉ | 18277/61904 [9:08:32<16:45:00, 1.38s/it] 30%|██▉ | 18278/61904 [9:08:33<16:27:16, 1.36s/it] 30%|██▉ | 18279/61904 [9:08:35<16:08:55, 1.33s/it] 30%|██▉ | 18280/61904 [9:08:36<16:22:51, 1.35s/it] {'loss': 2.6817, 'learning_rate': 1.7069882017373265e-07, 'epoch': 4.72} + 30%|██▉ | 18280/61904 [9:08:36<16:22:51, 1.35s/it] 30%|██▉ | 18281/61904 [9:08:38<16:49:56, 1.39s/it] 30%|██▉ | 18282/61904 [9:08:39<17:00:29, 1.40s/it] 30%|██▉ | 18283/61904 [9:08:40<16:45:59, 1.38s/it] 30%|██▉ | 18284/61904 [9:08:42<16:22:49, 1.35s/it] 30%|██▉ | 18285/61904 [9:08:43<16:05:47, 1.33s/it] 30%|██▉ | 18286/61904 [9:08:44<16:29:09, 1.36s/it] 30%|██▉ | 18287/61904 [9:08:46<16:50:00, 1.39s/it] 30%|██▉ | 18288/61904 [9:08:47<16:48:12, 1.39s/it] 30%|██▉ | 18289/61904 [9:08:49<17:00:53, 1.40s/it] 30%|██▉ | 18290/61904 [9:08:50<17:17:53, 1.43s/it] 30%|██▉ | 18291/61904 [9:08:52<17:20:17, 1.43s/it] 30%|██▉ | 18292/61904 [9:08:53<17:29:46, 1.44s/it] 30%|██▉ | 18293/61904 [9:08:55<17:44:35, 1.46s/it] 30%|██▉ | 18294/61904 [9:08:56<17:43:19, 1.46s/it] 30%|██▉ | 18295/61904 [9:08:57<17:31:16, 1.45s/it] 30%|██▉ | 18296/61904 [9:08:59<17:00:00, 1.40s/it] 30%|██▉ | 18297/61904 [9:09:00<16:33:14, 1.37s/it] 30%|██▉ | 18298/61904 [9:09:01<16:16:52, 1.34s/it] 30%|██▉ | 18299/61904 [9:09:03<16:08:12, 1.33s/it] 30%|██▉ | 18300/61904 [9:09:04<16:06:22, 1.33s/it] {'loss': 2.6768, 'learning_rate': 1.706664073641903e-07, 'epoch': 4.73} + 30%|██▉ | 18300/61904 [9:09:04<16:06:22, 1.33s/it] 30%|██▉ | 18301/61904 [9:09:05<16:54:41, 1.40s/it] 30%|██▉ | 18302/61904 [9:09:07<17:05:11, 1.41s/it] 30%|██▉ | 18303/61904 [9:09:08<16:47:31, 1.39s/it] 30%|██▉ | 18304/61904 [9:09:10<17:01:24, 1.41s/it] 30%|██▉ | 18305/61904 [9:09:11<16:56:15, 1.40s/it] 30%|██▉ | 18306/61904 [9:09:13<17:47:19, 1.47s/it] 30%|██▉ | 18307/61904 [9:09:14<17:42:00, 1.46s/it] 30%|██▉ | 18308/61904 [9:09:15<17:11:59, 1.42s/it] 30%|██▉ | 18309/61904 [9:09:17<16:55:19, 1.40s/it] 30%|██▉ | 18310/61904 [9:09:18<17:30:50, 1.45s/it] 30%|██▉ | 18311/61904 [9:09:20<17:10:15, 1.42s/it] 30%|██▉ | 18312/61904 [9:09:21<17:08:09, 1.42s/it] 30%|██▉ | 18313/61904 [9:09:22<16:50:14, 1.39s/it] 30%|██▉ | 18314/61904 [9:09:24<16:55:23, 1.40s/it] 30%|██▉ | 18315/61904 [9:09:25<16:52:26, 1.39s/it] 30%|██▉ | 18316/61904 [9:09:27<16:52:03, 1.39s/it] 30%|██▉ | 18317/61904 [9:09:28<16:36:31, 1.37s/it] 30%|██▉ | 18318/61904 [9:09:29<16:28:21, 1.36s/it] 30%|██▉ | 18319/61904 [9:09:31<16:15:02, 1.34s/it] 30%|██▉ | 18320/61904 [9:09:32<17:29:21, 1.44s/it] {'loss': 2.745, 'learning_rate': 1.70633994554648e-07, 'epoch': 4.73} + 30%|██▉ | 18320/61904 [9:09:32<17:29:21, 1.44s/it] 30%|██▉ | 18321/61904 [9:09:34<16:51:22, 1.39s/it] 30%|██▉ | 18322/61904 [9:09:35<17:23:52, 1.44s/it] 30%|██▉ | 18323/61904 [9:09:36<17:12:10, 1.42s/it] 30%|██▉ | 18324/61904 [9:09:38<16:56:56, 1.40s/it] 30%|██▉ | 18325/61904 [9:09:39<16:34:14, 1.37s/it] 30%|██▉ | 18326/61904 [9:09:41<16:38:33, 1.37s/it] 30%|██▉ | 18327/61904 [9:09:42<16:37:53, 1.37s/it] 30%|██▉ | 18328/61904 [9:09:43<16:24:28, 1.36s/it] 30%|██▉ | 18329/61904 [9:09:45<16:31:24, 1.37s/it] 30%|██▉ | 18330/61904 [9:09:46<16:49:29, 1.39s/it] 30%|██▉ | 18331/61904 [9:09:47<16:34:01, 1.37s/it] 30%|██▉ | 18332/61904 [9:09:49<16:15:10, 1.34s/it] 30%|██▉ | 18333/61904 [9:09:50<16:43:20, 1.38s/it] 30%|██▉ | 18334/61904 [9:09:51<16:33:41, 1.37s/it] 30%|██▉ | 18335/61904 [9:09:53<16:54:53, 1.40s/it] 30%|██▉ | 18336/61904 [9:09:54<16:36:33, 1.37s/it] 30%|██▉ | 18337/61904 [9:09:56<16:41:55, 1.38s/it] 30%|██▉ | 18338/61904 [9:09:57<17:06:49, 1.41s/it] 30%|██▉ | 18339/61904 [9:09:58<16:57:09, 1.40s/it] 30%|██▉ | 18340/61904 [9:10:00<16:45:27, 1.38s/it] {'loss': 2.6284, 'learning_rate': 1.7060158174510566e-07, 'epoch': 4.74} + 30%|██▉ | 18340/61904 [9:10:00<16:45:27, 1.38s/it] 30%|██▉ | 18341/61904 [9:10:01<17:10:23, 1.42s/it] 30%|██▉ | 18342/61904 [9:10:03<16:42:10, 1.38s/it] 30%|██▉ | 18343/61904 [9:10:04<16:57:17, 1.40s/it] 30%|██▉ | 18344/61904 [9:10:06<17:16:48, 1.43s/it] 30%|██▉ | 18345/61904 [9:10:07<17:06:26, 1.41s/it] 30%|██▉ | 18346/61904 [9:10:09<18:13:48, 1.51s/it] 30%|██▉ | 18347/61904 [9:10:10<17:38:31, 1.46s/it] 30%|██▉ | 18348/61904 [9:10:11<17:04:21, 1.41s/it] 30%|██▉ | 18349/61904 [9:10:13<17:34:56, 1.45s/it] 30%|██▉ | 18350/61904 [9:10:14<17:32:55, 1.45s/it] 30%|██▉ | 18351/61904 [9:10:16<17:21:02, 1.43s/it] 30%|██▉ | 18352/61904 [9:10:17<16:20:30, 1.35s/it] 30%|██▉ | 18353/61904 [9:10:18<15:50:48, 1.31s/it] 30%|██▉ | 18354/61904 [9:10:19<16:14:43, 1.34s/it] 30%|██▉ | 18355/61904 [9:10:21<16:47:28, 1.39s/it] 30%|██▉ | 18356/61904 [9:10:22<16:40:29, 1.38s/it] 30%|██▉ | 18357/61904 [9:10:24<16:12:38, 1.34s/it] 30%|██▉ | 18358/61904 [9:10:25<16:29:15, 1.36s/it] 30%|██▉ | 18359/61904 [9:10:26<16:32:24, 1.37s/it] 30%|██▉ | 18360/61904 [9:10:28<16:24:55, 1.36s/it] {'loss': 2.6909, 'learning_rate': 1.7056916893556332e-07, 'epoch': 4.74} + 30%|██▉ | 18360/61904 [9:10:28<16:24:55, 1.36s/it] 30%|██▉ | 18361/61904 [9:10:29<16:22:58, 1.35s/it] 30%|██▉ | 18362/61904 [9:10:30<16:26:12, 1.36s/it] 30%|██▉ | 18363/61904 [9:10:32<16:16:04, 1.35s/it] 30%|██▉ | 18364/61904 [9:10:33<16:19:46, 1.35s/it] 30%|██▉ | 18365/61904 [9:10:34<16:03:37, 1.33s/it] 30%|██▉ | 18366/61904 [9:10:36<16:10:02, 1.34s/it] 30%|██▉ | 18367/61904 [9:10:37<16:13:36, 1.34s/it] 30%|██▉ | 18368/61904 [9:10:38<16:15:51, 1.34s/it] 30%|██▉ | 18369/61904 [9:10:40<16:01:23, 1.33s/it] 30%|██▉ | 18370/61904 [9:10:41<16:11:19, 1.34s/it] 30%|██▉ | 18371/61904 [9:10:43<16:44:03, 1.38s/it] 30%|██▉ | 18372/61904 [9:10:44<16:46:03, 1.39s/it] 30%|██▉ | 18373/61904 [9:10:45<16:40:12, 1.38s/it] 30%|██▉ | 18374/61904 [9:10:47<16:43:30, 1.38s/it] 30%|██▉ | 18375/61904 [9:10:48<16:40:35, 1.38s/it] 30%|██▉ | 18376/61904 [9:10:50<17:05:37, 1.41s/it] 30%|██▉ | 18377/61904 [9:10:51<16:47:33, 1.39s/it] 30%|██▉ | 18378/61904 [9:10:52<16:21:40, 1.35s/it] 30%|██▉ | 18379/61904 [9:10:54<16:55:30, 1.40s/it] 30%|██▉ | 18380/61904 [9:10:55<17:00:25, 1.41s/it] {'loss': 2.6719, 'learning_rate': 1.70536756126021e-07, 'epoch': 4.75} + 30%|██▉ | 18380/61904 [9:10:55<17:00:25, 1.41s/it] 30%|██▉ | 18381/61904 [9:10:57<17:17:48, 1.43s/it] 30%|██▉ | 18382/61904 [9:10:58<16:52:10, 1.40s/it] 30%|██▉ | 18383/61904 [9:10:59<17:08:01, 1.42s/it] 30%|██▉ | 18384/61904 [9:11:01<16:38:29, 1.38s/it] 30%|██▉ | 18385/61904 [9:11:02<16:14:05, 1.34s/it] 30%|██▉ | 18386/61904 [9:11:03<15:59:42, 1.32s/it] 30%|██▉ | 18387/61904 [9:11:05<16:39:57, 1.38s/it] 30%|██▉ | 18388/61904 [9:11:06<16:38:36, 1.38s/it] 30%|██▉ | 18389/61904 [9:11:08<17:22:16, 1.44s/it] 30%|██▉ | 18390/61904 [9:11:09<16:28:00, 1.36s/it] 30%|██▉ | 18391/61904 [9:11:10<16:11:52, 1.34s/it] 30%|██▉ | 18392/61904 [9:11:12<16:18:21, 1.35s/it] 30%|██▉ | 18393/61904 [9:11:13<16:04:00, 1.33s/it] 30%|██▉ | 18394/61904 [9:11:14<16:12:51, 1.34s/it] 30%|██▉ | 18395/61904 [9:11:16<16:10:29, 1.34s/it] 30%|██▉ | 18396/61904 [9:11:17<16:32:56, 1.37s/it] 30%|██▉ | 18397/61904 [9:11:18<16:15:27, 1.35s/it] 30%|██▉ | 18398/61904 [9:11:20<16:28:31, 1.36s/it] 30%|█���▉ | 18399/61904 [9:11:21<16:53:46, 1.40s/it] 30%|██▉ | 18400/61904 [9:11:22<16:23:54, 1.36s/it] {'loss': 2.6787, 'learning_rate': 1.7050434331647867e-07, 'epoch': 4.76} + 30%|██▉ | 18400/61904 [9:11:22<16:23:54, 1.36s/it] 30%|██▉ | 18401/61904 [9:11:24<16:28:01, 1.36s/it] 30%|██▉ | 18402/61904 [9:11:25<16:27:28, 1.36s/it] 30%|██▉ | 18403/61904 [9:11:26<16:24:30, 1.36s/it] 30%|██▉ | 18404/61904 [9:11:28<16:29:10, 1.36s/it] 30%|██▉ | 18405/61904 [9:11:29<16:19:17, 1.35s/it] 30%|██▉ | 18406/61904 [9:11:30<15:52:22, 1.31s/it] 30%|██▉ | 18407/61904 [9:11:32<15:53:48, 1.32s/it] 30%|██▉ | 18408/61904 [9:11:33<16:06:19, 1.33s/it] 30%|██▉ | 18409/61904 [9:11:35<16:40:39, 1.38s/it] 30%|██▉ | 18410/61904 [9:11:36<16:35:26, 1.37s/it] 30%|██▉ | 18411/61904 [9:11:37<16:38:35, 1.38s/it] 30%|██▉ | 18412/61904 [9:11:39<16:26:23, 1.36s/it] 30%|██▉ | 18413/61904 [9:11:40<16:46:27, 1.39s/it] 30%|██▉ | 18414/61904 [9:11:41<16:42:27, 1.38s/it] 30%|██▉ | 18415/61904 [9:11:43<17:07:14, 1.42s/it] 30%|██▉ | 18416/61904 [9:11:44<16:51:18, 1.40s/it] 30%|██▉ | 18417/61904 [9:11:46<17:08:55, 1.42s/it] 30%|██▉ | 18418/61904 [9:11:47<17:35:35, 1.46s/it] 30%|██▉ | 18419/61904 [9:11:49<17:02:32, 1.41s/it] 30%|██▉ | 18420/61904 [9:11:50<17:05:36, 1.42s/it] {'loss': 2.6998, 'learning_rate': 1.7047193050693633e-07, 'epoch': 4.76} + 30%|██▉ | 18420/61904 [9:11:50<17:05:36, 1.42s/it] 30%|██▉ | 18421/61904 [9:11:51<16:40:20, 1.38s/it] 30%|██▉ | 18422/61904 [9:11:53<16:43:49, 1.39s/it] 30%|██▉ | 18423/61904 [9:11:54<16:36:39, 1.38s/it] 30%|██▉ | 18424/61904 [9:11:55<16:34:34, 1.37s/it] 30%|██▉ | 18425/61904 [9:11:57<16:43:13, 1.38s/it] 30%|██▉ | 18426/61904 [9:11:58<16:33:20, 1.37s/it] 30%|██▉ | 18427/61904 [9:12:00<16:31:27, 1.37s/it] 30%|██▉ | 18428/61904 [9:12:01<16:10:54, 1.34s/it] 30%|██▉ | 18429/61904 [9:12:02<16:04:31, 1.33s/it] 30%|██▉ | 18430/61904 [9:12:03<15:52:27, 1.31s/it] 30%|██▉ | 18431/61904 [9:12:05<16:10:25, 1.34s/it] 30%|██▉ | 18432/61904 [9:12:06<16:43:40, 1.39s/it] 30%|██▉ | 18433/61904 [9:12:08<16:51:08, 1.40s/it] 30%|██▉ | 18434/61904 [9:12:09<16:42:01, 1.38s/it] 30%|██▉ | 18435/61904 [9:12:10<16:41:52, 1.38s/it] 30%|██▉ | 18436/61904 [9:12:12<16:51:26, 1.40s/it] 30%|██▉ | 18437/61904 [9:12:13<16:17:49, 1.35s/it] 30%|██▉ | 18438/61904 [9:12:14<16:04:53, 1.33s/it] 30%|██▉ | 18439/61904 [9:12:16<16:21:54, 1.36s/it] 30%|██▉ | 18440/61904 [9:12:17<16:17:29, 1.35s/it] {'loss': 2.7545, 'learning_rate': 1.70439517697394e-07, 'epoch': 4.77} + 30%|██▉ | 18440/61904 [9:12:17<16:17:29, 1.35s/it] 30%|██▉ | 18441/61904 [9:12:18<16:03:41, 1.33s/it] 30%|██▉ | 18442/61904 [9:12:20<16:12:18, 1.34s/it] 30%|██▉ | 18443/61904 [9:12:21<16:01:30, 1.33s/it] 30%|██▉ | 18444/61904 [9:12:23<16:09:10, 1.34s/it] 30%|██▉ | 18445/61904 [9:12:24<16:18:25, 1.35s/it] 30%|██▉ | 18446/61904 [9:12:25<16:28:41, 1.37s/it] 30%|██▉ | 18447/61904 [9:12:27<16:35:43, 1.37s/it] 30%|██▉ | 18448/61904 [9:12:28<16:37:09, 1.38s/it] 30%|██▉ | 18449/61904 [9:12:30<17:06:24, 1.42s/it] 30%|██▉ | 18450/61904 [9:12:31<16:39:08, 1.38s/it] 30%|██▉ | 18451/61904 [9:12:32<16:26:47, 1.36s/it] 30%|██▉ | 18452/61904 [9:12:34<16:58:51, 1.41s/it] 30%|██▉ | 18453/61904 [9:12:35<17:30:55, 1.45s/it] 30%|██▉ | 18454/61904 [9:12:37<16:50:23, 1.40s/it] 30%|██▉ | 18455/61904 [9:12:38<16:34:16, 1.37s/it] 30%|██▉ | 18456/61904 [9:12:39<17:07:27, 1.42s/it] 30%|██▉ | 18457/61904 [9:12:41<17:10:17, 1.42s/it] 30%|██▉ | 18458/61904 [9:12:42<16:55:57, 1.40s/it] 30%|██▉ | 18459/61904 [9:12:44<16:45:40, 1.39s/it] 30%|██▉ | 18460/61904 [9:12:45<16:50:50, 1.40s/it] {'loss': 2.7371, 'learning_rate': 1.7040710488785166e-07, 'epoch': 4.77} + 30%|██▉ | 18460/61904 [9:12:45<16:50:50, 1.40s/it] 30%|██▉ | 18461/61904 [9:12:46<17:02:11, 1.41s/it] 30%|██▉ | 18462/61904 [9:12:48<17:24:47, 1.44s/it] 30%|██▉ | 18463/61904 [9:12:49<16:43:28, 1.39s/it] 30%|██▉ | 18464/61904 [9:12:51<16:36:30, 1.38s/it] 30%|██▉ | 18465/61904 [9:12:52<16:19:46, 1.35s/it] 30%|██▉ | 18466/61904 [9:12:53<16:12:15, 1.34s/it] 30%|██▉ | 18467/61904 [9:12:54<16:11:20, 1.34s/it] 30%|██▉ | 18468/61904 [9:12:56<16:45:40, 1.39s/it] 30%|██▉ | 18469/61904 [9:12:57<16:51:48, 1.40s/it] 30%|██▉ | 18470/61904 [9:12:59<16:29:33, 1.37s/it] 30%|██▉ | 18471/61904 [9:13:00<16:17:44, 1.35s/it] 30%|██▉ | 18472/61904 [9:13:01<16:46:33, 1.39s/it] 30%|██▉ | 18473/61904 [9:13:03<16:33:27, 1.37s/it] 30%|██▉ | 18474/61904 [9:13:04<16:36:50, 1.38s/it] 30%|██▉ | 18475/61904 [9:13:06<16:31:40, 1.37s/it] 30%|██▉ | 18476/61904 [9:13:07<16:39:03, 1.38s/it] 30%|██▉ | 18477/61904 [9:13:08<16:21:22, 1.36s/it] 30%|██▉ | 18478/61904 [9:13:10<16:23:39, 1.36s/it] 30%|██▉ | 18479/61904 [9:13:11<16:05:10, 1.33s/it] 30%|██▉ | 18480/61904 [9:13:12<15:56:11, 1.32s/it] {'loss': 2.6803, 'learning_rate': 1.7037469207830934e-07, 'epoch': 4.78} + 30%|██▉ | 18480/61904 [9:13:12<15:56:11, 1.32s/it] 30%|██▉ | 18481/61904 [9:13:14<16:15:00, 1.35s/it] 30%|██▉ | 18482/61904 [9:13:15<16:52:31, 1.40s/it] 30%|██▉ | 18483/61904 [9:13:17<16:58:33, 1.41s/it] 30%|██▉ | 18484/61904 [9:13:18<16:38:29, 1.38s/it] 30%|██▉ | 18485/61904 [9:13:19<16:30:37, 1.37s/it] 30%|██▉ | 18486/61904 [9:13:21<16:56:14, 1.40s/it] 30%|██▉ | 18487/61904 [9:13:22<16:45:03, 1.39s/it] 30%|██▉ | 18488/61904 [9:13:23<16:38:58, 1.38s/it] 30%|██▉ | 18489/61904 [9:13:25<17:09:09, 1.42s/it] 30%|██▉ | 18490/61904 [9:13:26<16:52:00, 1.40s/it] 30%|██▉ | 18491/61904 [9:13:28<16:33:24, 1.37s/it] 30%|██▉ | 18492/61904 [9:13:29<16:57:10, 1.41s/it] 30%|██▉ | 18493/61904 [9:13:30<16:34:51, 1.38s/it] 30%|██▉ | 18494/61904 [9:13:32<17:00:42, 1.41s/it] 30%|██▉ | 18495/61904 [9:13:33<16:54:13, 1.40s/it] 30%|██▉ | 18496/61904 [9:13:35<17:08:42, 1.42s/it] 30%|██▉ | 18497/61904 [9:13:36<17:09:10, 1.42s/it] 30%|██▉ | 18498/61904 [9:13:38<17:40:43, 1.47s/it] 30%|██▉ | 18499/61904 [9:13:39<17:17:37, 1.43s/it] 30%|██▉ | 18500/61904 [9:13:40<16:29:03, 1.37s/it] {'loss': 2.6713, 'learning_rate': 1.70342279268767e-07, 'epoch': 4.78} + 30%|██▉ | 18500/61904 [9:13:40<16:29:03, 1.37s/it] 30%|██▉ | 18501/61904 [9:13:42<16:16:37, 1.35s/it] 30%|██▉ | 18502/61904 [9:13:43<16:19:00, 1.35s/it] 30%|██▉ | 18503/61904 [9:13:44<16:06:55, 1.34s/it] 30%|██▉ | 18504/61904 [9:13:46<16:39:12, 1.38s/it] 30%|██▉ | 18505/61904 [9:13:47<16:21:41, 1.36s/it] 30%|██▉ | 18506/61904 [9:13:48<16:41:48, 1.39s/it] 30%|██▉ | 18507/61904 [9:13:50<16:30:09, 1.37s/it] 30%|██▉ | 18508/61904 [9:13:52<17:45:12, 1.47s/it] 30%|██▉ | 18509/61904 [9:13:53<17:06:09, 1.42s/it] 30%|██▉ | 18510/61904 [9:13:54<16:52:23, 1.40s/it] 30%|██▉ | 18511/61904 [9:13:55<16:26:14, 1.36s/it] 30%|██▉ | 18512/61904 [9:13:57<16:20:46, 1.36s/it] 30%|██▉ | 18513/61904 [9:13:58<16:17:13, 1.35s/it] 30%|██▉ | 18514/61904 [9:13:59<16:15:21, 1.35s/it] 30%|██▉ | 18515/61904 [9:14:01<15:58:58, 1.33s/it] 30%|██▉ | 18516/61904 [9:14:02<16:23:01, 1.36s/it] 30%|██▉ | 18517/61904 [9:14:04<17:11:02, 1.43s/it] 30%|██▉ | 18518/61904 [9:14:05<16:49:06, 1.40s/it] 30%|██▉ | 18519/61904 [9:14:07<17:52:10, 1.48s/it] 30%|██▉ | 18520/61904 [9:14:08<18:28:44, 1.53s/it] {'loss': 2.7047, 'learning_rate': 1.7030986645922467e-07, 'epoch': 4.79} + 30%|██▉ | 18520/61904 [9:14:08<18:28:44, 1.53s/it] 30%|██▉ | 18521/61904 [9:14:10<18:09:42, 1.51s/it] 30%|██▉ | 18522/61904 [9:14:11<18:11:24, 1.51s/it] 30%|██▉ | 18523/61904 [9:14:13<17:21:45, 1.44s/it] 30%|██▉ | 18524/61904 [9:14:14<16:43:50, 1.39s/it] 30%|██▉ | 18525/61904 [9:14:15<16:22:26, 1.36s/it] 30%|██▉ | 18526/61904 [9:14:17<16:23:08, 1.36s/it] 30%|██▉ | 18527/61904 [9:14:18<16:14:04, 1.35s/it] 30%|██▉ | 18528/61904 [9:14:19<16:26:46, 1.36s/it] 30%|██▉ | 18529/61904 [9:14:21<16:25:13, 1.36s/it] 30%|██▉ | 18530/61904 [9:14:22<16:36:48, 1.38s/it] 30%|██▉ | 18531/61904 [9:14:23<16:06:57, 1.34s/it] 30%|██▉ | 18532/61904 [9:14:25<16:23:53, 1.36s/it] 30%|██▉ | 18533/61904 [9:14:26<16:17:33, 1.35s/it] 30%|██▉ | 18534/61904 [9:14:27<16:29:30, 1.37s/it] 30%|██▉ | 18535/61904 [9:14:29<16:08:42, 1.34s/it] 30%|██▉ | 18536/61904 [9:14:30<15:59:56, 1.33s/it] 30%|██▉ | 18537/61904 [9:14:31<16:16:15, 1.35s/it] 30%|██▉ | 18538/61904 [9:14:33<16:02:23, 1.33s/it] 30%|██▉ | 18539/61904 [9:14:34<16:21:09, 1.36s/it] 30%|██▉ | 18540/61904 [9:14:36<16:19:27, 1.36s/it] {'loss': 2.6503, 'learning_rate': 1.7027745364968236e-07, 'epoch': 4.79} + 30%|██▉ | 18540/61904 [9:14:36<16:19:27, 1.36s/it] 30%|██▉ | 18541/61904 [9:14:37<16:30:50, 1.37s/it] 30%|██▉ | 18542/61904 [9:14:39<17:45:54, 1.47s/it] 30%|██▉ | 18543/61904 [9:14:40<16:56:16, 1.41s/it] 30%|██▉ | 18544/61904 [9:14:41<17:02:01, 1.41s/it] 30%|██▉ | 18545/61904 [9:14:43<17:03:36, 1.42s/it] 30%|██▉ | 18546/61904 [9:14:44<16:29:06, 1.37s/it] 30%|██▉ | 18547/61904 [9:14:45<16:07:09, 1.34s/it] 30%|██▉ | 18548/61904 [9:14:47<15:50:24, 1.32s/it] 30%|██▉ | 18549/61904 [9:14:48<16:31:21, 1.37s/it] 30%|██▉ | 18550/61904 [9:14:49<16:28:49, 1.37s/it] 30%|██▉ | 18551/61904 [9:14:51<17:12:33, 1.43s/it] 30%|██▉ | 18552/61904 [9:14:52<17:23:22, 1.44s/it] 30%|██▉ | 18553/61904 [9:14:54<17:28:25, 1.45s/it] 30%|██▉ | 18554/61904 [9:14:55<16:48:07, 1.40s/it] 30%|██▉ | 18555/61904 [9:14:57<17:18:36, 1.44s/it] 30%|██▉ | 18556/61904 [9:14:58<17:21:33, 1.44s/it] 30%|██▉ | 18557/61904 [9:15:00<17:13:29, 1.43s/it] 30%|██▉ | 18558/61904 [9:15:01<16:57:59, 1.41s/it] 30%|██▉ | 18559/61904 [9:15:02<16:38:44, 1.38s/it] 30%|██▉ | 18560/61904 [9:15:04<16:30:38, 1.37s/it] {'loss': 2.7162, 'learning_rate': 1.7024504084014002e-07, 'epoch': 4.8} + 30%|██▉ | 18560/61904 [9:15:04<16:30:38, 1.37s/it] 30%|██▉ | 18561/61904 [9:15:05<16:36:15, 1.38s/it] 30%|██▉ | 18562/61904 [9:15:07<17:49:03, 1.48s/it] 30%|██▉ | 18563/61904 [9:15:08<17:26:11, 1.45s/it] 30%|██▉ | 18564/61904 [9:15:10<17:25:09, 1.45s/it] 30%|██▉ | 18565/61904 [9:15:11<17:12:05, 1.43s/it] 30%|██▉ | 18566/61904 [9:15:12<16:45:37, 1.39s/it] 30%|██▉ | 18567/61904 [9:15:14<16:25:01, 1.36s/it] 30%|██▉ | 18568/61904 [9:15:15<16:41:58, 1.39s/it] 30%|██▉ | 18569/61904 [9:15:16<17:09:28, 1.43s/it] 30%|██▉ | 18570/61904 [9:15:18<17:06:25, 1.42s/it] 30%|██▉ | 18571/61904 [9:15:19<16:38:48, 1.38s/it] 30%|███ | 18572/61904 [9:15:20<16:16:37, 1.35s/it] 30%|███ | 18573/61904 [9:15:22<15:55:50, 1.32s/it] 30%|███ | 18574/61904 [9:15:23<16:11:52, 1.35s/it] 30%|███ | 18575/61904 [9:15:24<16:03:18, 1.33s/it] 30%|███ | 18576/61904 [9:15:26<16:46:58, 1.39s/it] 30%|███ | 18577/61904 [9:15:28<17:27:35, 1.45s/it] 30%|███ | 18578/61904 [9:15:29<16:58:25, 1.41s/it] 30%|███ | 18579/61904 [9:15:30<16:29:29, 1.37s/it] 30%|███ | 18580/61904 [9:15:31<16:05:14, 1.34s/it] {'loss': 2.7065, 'learning_rate': 1.7021262803059768e-07, 'epoch': 4.8} + 30%|███ | 18580/61904 [9:15:31<16:05:14, 1.34s/it] 30%|███ | 18581/61904 [9:15:33<16:22:47, 1.36s/it] 30%|███ | 18582/61904 [9:15:34<16:24:15, 1.36s/it] 30%|███ | 18583/61904 [9:15:36<16:33:04, 1.38s/it] 30%|███ | 18584/61904 [9:15:37<16:37:44, 1.38s/it] 30%|███ | 18585/61904 [9:15:38<16:26:37, 1.37s/it] 30%|███ | 18586/61904 [9:15:40<16:06:10, 1.34s/it] 30%|███ | 18587/61904 [9:15:41<16:25:24, 1.36s/it] 30%|███ | 18588/61904 [9:15:42<16:28:57, 1.37s/it] 30%|███ | 18589/61904 [9:15:44<16:19:24, 1.36s/it] 30%|███ | 18590/61904 [9:15:45<16:12:14, 1.35s/it] 30%|███ | 18591/61904 [9:15:46<16:20:02, 1.36s/it] 30%|███ | 18592/61904 [9:15:48<16:45:16, 1.39s/it] 30%|███ | 18593/61904 [9:15:49<16:33:25, 1.38s/it] 30%|███ | 18594/61904 [9:15:51<16:41:20, 1.39s/it] 30%|███ | 18595/61904 [9:15:52<16:22:01, 1.36s/it] 30%|███ | 18596/61904 [9:15:53<16:47:12, 1.40s/it] 30%|███ | 18597/61904 [9:15:55<16:56:06, 1.41s/it] 30%|███ | 18598/61904 [9:15:56<16:49:52, 1.40s/it] 30%|███ | 18599/61904 [9:15:58<17:04:08, 1.42s/it] 30%|███ | 18600/61904 [9:15:59<16:53:43, 1.40s/it] {'loss': 2.7029, 'learning_rate': 1.7018021522105534e-07, 'epoch': 4.81} + 30%|███ | 18600/61904 [9:15:59<16:53:43, 1.40s/it] 30%|███ | 18601/61904 [9:16:01<17:17:18, 1.44s/it] 30%|███ | 18602/61904 [9:16:02<18:38:00, 1.55s/it] 30%|███ | 18603/61904 [9:16:04<17:51:08, 1.48s/it] 30%|███ | 18604/61904 [9:16:05<17:28:40, 1.45s/it] 30%|███ | 18605/61904 [9:16:06<17:18:30, 1.44s/it] 30%|███ | 18606/61904 [9:16:08<16:49:00, 1.40s/it] 30%|███ | 18607/61904 [9:16:09<16:36:45, 1.38s/it] 30%|███ | 18608/61904 [9:16:10<16:21:21, 1.36s/it] 30%|███ | 18609/61904 [9:16:12<16:41:49, 1.39s/it] 30%|███ | 18610/61904 [9:16:13<17:06:43, 1.42s/it] 30%|███ | 18611/61904 [9:16:15<16:45:38, 1.39s/it] 30%|███ | 18612/61904 [9:16:16<16:13:32, 1.35s/it] 30%|███ | 18613/61904 [9:16:17<16:12:49, 1.35s/it] 30%|███ | 18614/61904 [9:16:19<17:22:55, 1.45s/it] 30%|███ | 18615/61904 [9:16:20<16:30:35, 1.37s/it] 30%|███ | 18616/61904 [9:16:22<16:37:10, 1.38s/it] 30%|███ | 18617/61904 [9:16:23<16:41:03, 1.39s/it] 30%|███ | 18618/61904 [9:16:25<17:35:44, 1.46s/it] 30%|███ | 18619/61904 [9:16:26<17:24:34, 1.45s/it] 30%|███ | 18620/61904 [9:16:27<17:20:31, 1.44s/it] {'loss': 2.6436, 'learning_rate': 1.70147802411513e-07, 'epoch': 4.81} + 30%|███ | 18620/61904 [9:16:27<17:20:31, 1.44s/it] 30%|███ | 18621/61904 [9:16:29<17:05:45, 1.42s/it] 30%|███ | 18622/61904 [9:16:30<17:18:36, 1.44s/it] 30%|███ | 18623/61904 [9:16:32<17:15:11, 1.44s/it] 30%|███ | 18624/61904 [9:16:33<17:04:08, 1.42s/it] 30%|███ | 18625/61904 [9:16:35<16:59:41, 1.41s/it] 30%|███ | 18626/61904 [9:16:36<16:35:35, 1.38s/it] 30%|███ | 18627/61904 [9:16:38<17:42:40, 1.47s/it] 30%|███ | 18628/61904 [9:16:39<17:25:34, 1.45s/it] 30%|███ | 18629/61904 [9:16:40<17:11:08, 1.43s/it] 30%|███ | 18630/61904 [9:16:42<17:17:24, 1.44s/it] 30%|███ | 18631/61904 [9:16:43<17:26:01, 1.45s/it] 30%|███ | 18632/61904 [9:16:45<16:58:14, 1.41s/it] 30%|███ | 18633/61904 [9:16:46<16:51:15, 1.40s/it] 30%|███ | 18634/61904 [9:16:47<16:21:21, 1.36s/it] 30%|███ | 18635/61904 [9:16:49<16:05:11, 1.34s/it] 30%|███ | 18636/61904 [9:16:50<16:20:52, 1.36s/it] 30%|███ | 18637/61904 [9:16:52<17:26:12, 1.45s/it] 30%|███ | 18638/61904 [9:16:53<17:01:51, 1.42s/it] 30%|███ | 18639/61904 [9:16:54<17:03:38, 1.42s/it] 30%|███ | 18640/61904 [9:16:56<17:02:38, 1.42s/it] {'loss': 2.6633, 'learning_rate': 1.701153896019707e-07, 'epoch': 4.82} + 30%|███ | 18640/61904 [9:16:56<17:02:38, 1.42s/it] 30%|███ | 18641/61904 [9:16:57<16:34:22, 1.38s/it] 30%|███ | 18642/61904 [9:16:58<16:15:38, 1.35s/it] 30%|███ | 18643/61904 [9:17:00<16:34:25, 1.38s/it] 30%|███ | 18644/61904 [9:17:01<16:44:51, 1.39s/it] 30%|███ | 18645/61904 [9:17:03<17:00:41, 1.42s/it] 30%|███ | 18646/61904 [9:17:04<16:44:05, 1.39s/it] 30%|███ | 18647/61904 [9:17:05<16:49:36, 1.40s/it] 30%|███ | 18648/61904 [9:17:07<17:15:25, 1.44s/it] 30%|███ | 18649/61904 [9:17:08<17:10:45, 1.43s/it] 30%|███ | 18650/61904 [9:17:10<17:24:17, 1.45s/it] 30%|███ | 18651/61904 [9:17:11<17:17:20, 1.44s/it] 30%|███ | 18652/61904 [9:17:13<16:55:30, 1.41s/it] 30%|███ | 18653/61904 [9:17:14<16:34:51, 1.38s/it] 30%|███ | 18654/61904 [9:17:15<16:53:35, 1.41s/it] 30%|███ | 18655/61904 [9:17:17<16:44:19, 1.39s/it] 30%|███ | 18656/61904 [9:17:18<16:29:45, 1.37s/it] 30%|███ | 18657/61904 [9:17:20<16:58:17, 1.41s/it] 30%|███ | 18658/61904 [9:17:21<16:50:40, 1.40s/it] 30%|███ | 18659/61904 [9:17:22<16:43:17, 1.39s/it] 30%|███ | 18660/61904 [9:17:24<16:19:21, 1.36s/it] {'loss': 2.6557, 'learning_rate': 1.7008297679242835e-07, 'epoch': 4.82} + 30%|███ | 18660/61904 [9:17:24<16:19:21, 1.36s/it] 30%|███ | 18661/61904 [9:17:25<16:19:24, 1.36s/it] 30%|███ | 18662/61904 [9:17:26<16:35:27, 1.38s/it] 30%|███ | 18663/61904 [9:17:28<16:45:03, 1.39s/it] 30%|███ | 18664/61904 [9:17:29<17:01:24, 1.42s/it] 30%|███ | 18665/61904 [9:17:31<17:19:59, 1.44s/it] 30%|███ | 18666/61904 [9:17:32<16:31:11, 1.38s/it] 30%|███ | 18667/61904 [9:17:33<16:25:46, 1.37s/it] 30%|███ | 18668/61904 [9:17:35<16:43:06, 1.39s/it] 30%|███ | 18669/61904 [9:17:36<16:27:59, 1.37s/it] 30%|███ | 18670/61904 [9:17:38<16:36:36, 1.38s/it] 30%|███ | 18671/61904 [9:17:39<16:54:41, 1.41s/it] 30%|███ | 18672/61904 [9:17:40<16:14:48, 1.35s/it] 30%|███ | 18673/61904 [9:17:42<15:57:34, 1.33s/it] 30%|███ | 18674/61904 [9:17:43<16:04:25, 1.34s/it] 30%|███ | 18675/61904 [9:17:44<16:04:53, 1.34s/it] 30%|███ | 18676/61904 [9:17:46<16:15:26, 1.35s/it] 30%|███ | 18677/61904 [9:17:47<16:38:46, 1.39s/it] 30%|███ | 18678/61904 [9:17:49<16:47:35, 1.40s/it] 30%|███ | 18679/61904 [9:17:50<17:21:39, 1.45s/it] 30%|███ | 18680/61904 [9:17:51<16:52:38, 1.41s/it] {'loss': 2.7084, 'learning_rate': 1.7005056398288602e-07, 'epoch': 4.83} + 30%|███ | 18680/61904 [9:17:51<16:52:38, 1.41s/it] 30%|███ | 18681/61904 [9:17:53<16:15:03, 1.35s/it] 30%|███ | 18682/61904 [9:17:54<16:04:10, 1.34s/it] 30%|███ | 18683/61904 [9:17:55<16:43:17, 1.39s/it] 30%|███ | 18684/61904 [9:17:57<16:34:59, 1.38s/it] 30%|███ | 18685/61904 [9:17:58<17:11:36, 1.43s/it] 30%|███ | 18686/61904 [9:18:00<17:22:47, 1.45s/it] 30%|███ | 18687/61904 [9:18:01<17:21:12, 1.45s/it] 30%|███ | 18688/61904 [9:18:03<16:49:07, 1.40s/it] 30%|███ | 18689/61904 [9:18:04<16:29:50, 1.37s/it] 30%|███ | 18690/61904 [9:18:05<16:24:21, 1.37s/it] 30%|███ | 18691/61904 [9:18:07<17:22:12, 1.45s/it] 30%|███ | 18692/61904 [9:18:08<17:30:19, 1.46s/it] 30%|███ | 18693/61904 [9:18:10<17:16:01, 1.44s/it] 30%|███ | 18694/61904 [9:18:11<17:08:49, 1.43s/it] 30%|███ | 18695/61904 [9:18:12<16:43:25, 1.39s/it] 30%|███ | 18696/61904 [9:18:14<16:36:53, 1.38s/it] 30%|███ | 18697/61904 [9:18:15<17:23:03, 1.45s/it] 30%|███ | 18698/61904 [9:18:17<16:49:26, 1.40s/it] 30%|███ | 18699/61904 [9:18:18<17:10:42, 1.43s/it] 30%|███ | 18700/61904 [9:18:19<16:32:46, 1.38s/it] {'loss': 2.6708, 'learning_rate': 1.700181511733437e-07, 'epoch': 4.83} + 30%|███ | 18700/61904 [9:18:19<16:32:46, 1.38s/it] 30%|███ | 18701/61904 [9:18:21<16:20:52, 1.36s/it] 30%|███ | 18702/61904 [9:18:22<16:15:50, 1.36s/it] 30%|███ | 18703/61904 [9:18:24<16:28:41, 1.37s/it] 30%|███ | 18704/61904 [9:18:25<16:08:02, 1.34s/it] 30%|███ | 18705/61904 [9:18:26<16:40:32, 1.39s/it] 30%|███ | 18706/61904 [9:18:28<16:17:33, 1.36s/it] 30%|███ | 18707/61904 [9:18:29<16:40:28, 1.39s/it] 30%|███ | 18708/61904 [9:18:30<16:41:34, 1.39s/it] 30%|███ | 18709/61904 [9:18:32<16:29:52, 1.37s/it] 30%|███ | 18710/61904 [9:18:33<16:54:39, 1.41s/it] 30%|███ | 18711/61904 [9:18:35<17:13:59, 1.44s/it] 30%|███ | 18712/61904 [9:18:36<17:40:33, 1.47s/it] 30%|███ | 18713/61904 [9:18:38<17:16:47, 1.44s/it] 30%|███ | 18714/61904 [9:18:39<17:03:13, 1.42s/it] 30%|███ | 18715/61904 [9:18:40<16:45:28, 1.40s/it] 30%|███ | 18716/61904 [9:18:42<17:06:56, 1.43s/it] 30%|███ | 18717/61904 [9:18:43<17:24:02, 1.45s/it] 30%|███ | 18718/61904 [9:18:45<17:22:16, 1.45s/it] 30%|███ | 18719/61904 [9:18:46<16:53:09, 1.41s/it] 30%|███ | 18720/61904 [9:18:48<17:00:38, 1.42s/it] {'loss': 2.7278, 'learning_rate': 1.6998573836380137e-07, 'epoch': 4.84} + 30%|███ | 18720/61904 [9:18:48<17:00:38, 1.42s/it] 30%|███ | 18721/61904 [9:18:49<17:37:02, 1.47s/it] 30%|███ | 18722/61904 [9:18:51<17:34:31, 1.47s/it] 30%|███ | 18723/61904 [9:18:52<17:18:48, 1.44s/it] 30%|███ | 18724/61904 [9:18:53<16:41:04, 1.39s/it] 30%|███ | 18725/61904 [9:18:55<16:47:00, 1.40s/it] 30%|███ | 18726/61904 [9:18:56<16:39:21, 1.39s/it] 30%|███ | 18727/61904 [9:18:58<17:55:26, 1.49s/it] 30%|███ | 18728/61904 [9:18:59<17:12:13, 1.43s/it] 30%|███ | 18729/61904 [9:19:00<16:40:06, 1.39s/it] 30%|███ | 18730/61904 [9:19:02<16:14:54, 1.35s/it] 30%|███ | 18731/61904 [9:19:03<16:14:27, 1.35s/it] 30%|███ | 18732/61904 [9:19:04<16:26:58, 1.37s/it] 30%|███ | 18733/61904 [9:19:06<16:47:13, 1.40s/it] 30%|███ | 18734/61904 [9:19:07<16:53:34, 1.41s/it] 30%|███ | 18735/61904 [9:19:09<16:46:35, 1.40s/it] 30%|███ | 18736/61904 [9:19:10<16:35:09, 1.38s/it] 30%|███ | 18737/61904 [9:19:11<16:37:10, 1.39s/it] 30%|███ | 18738/61904 [9:19:13<16:41:06, 1.39s/it] 30%|███ | 18739/61904 [9:19:14<16:39:12, 1.39s/it] 30%|███ | 18740/61904 [9:19:16<16:39:55, 1.39s/it] {'loss': 2.7249, 'learning_rate': 1.6995332555425903e-07, 'epoch': 4.84} + 30%|███ | 18740/61904 [9:19:16<16:39:55, 1.39s/it] 30%|███ | 18741/61904 [9:19:17<16:04:19, 1.34s/it] 30%|███ | 18742/61904 [9:19:18<16:08:14, 1.35s/it] 30%|███ | 18743/61904 [9:19:20<16:44:09, 1.40s/it] 30%|███ | 18744/61904 [9:19:21<16:46:25, 1.40s/it] 30%|███ | 18745/61904 [9:19:23<16:37:44, 1.39s/it] 30%|███ | 18746/61904 [9:19:24<16:43:45, 1.40s/it] 30%|███ | 18747/61904 [9:19:25<16:43:56, 1.40s/it] 30%|███ | 18748/61904 [9:19:27<17:11:17, 1.43s/it] 30%|███ | 18749/61904 [9:19:28<17:21:34, 1.45s/it] 30%|███ | 18750/61904 [9:19:30<17:07:08, 1.43s/it] 30%|███ | 18751/61904 [9:19:31<16:53:23, 1.41s/it] 30%|███ | 18752/61904 [9:19:32<16:25:13, 1.37s/it] 30%|███ | 18753/61904 [9:19:34<16:18:54, 1.36s/it] 30%|███ | 18754/61904 [9:19:35<16:20:57, 1.36s/it] 30%|███ | 18755/61904 [9:19:37<16:45:41, 1.40s/it] 30%|███ | 18756/61904 [9:19:38<16:40:37, 1.39s/it] 30%|███ | 18757/61904 [9:19:39<17:17:46, 1.44s/it] 30%|███ | 18758/61904 [9:19:41<17:16:32, 1.44s/it] 30%|███ | 18759/61904 [9:19:42<16:55:27, 1.41s/it] 30%|███ | 18760/61904 [9:19:44<16:34:40, 1.38s/it] {'loss': 2.6956, 'learning_rate': 1.6992091274471672e-07, 'epoch': 4.85} + 30%|███ | 18760/61904 [9:19:44<16:34:40, 1.38s/it] 30%|███ | 18761/61904 [9:19:45<16:21:57, 1.37s/it] 30%|███ | 18762/61904 [9:19:46<16:18:35, 1.36s/it] 30%|███ | 18763/61904 [9:19:48<16:11:56, 1.35s/it] 30%|███ | 18764/61904 [9:19:49<16:33:38, 1.38s/it] 30%|███ | 18765/61904 [9:19:50<16:30:36, 1.38s/it] 30%|███ | 18766/61904 [9:19:52<16:22:10, 1.37s/it] 30%|███ | 18767/61904 [9:19:53<16:42:11, 1.39s/it] 30%|███ | 18768/61904 [9:19:55<16:30:32, 1.38s/it] 30%|███ | 18769/61904 [9:19:56<16:42:13, 1.39s/it] 30%|███ | 18770/61904 [9:19:57<16:15:24, 1.36s/it] 30%|███ | 18771/61904 [9:19:59<16:42:11, 1.39s/it] 30%|███ | 18772/61904 [9:20:00<16:44:04, 1.40s/it] 30%|███ | 18773/61904 [9:20:02<16:57:31, 1.42s/it] 30%|███ | 18774/61904 [9:20:03<16:41:06, 1.39s/it] 30%|███ | 18775/61904 [9:20:04<16:48:19, 1.40s/it] 30%|███ | 18776/61904 [9:20:06<16:48:57, 1.40s/it] 30%|███ | 18777/61904 [9:20:07<16:53:33, 1.41s/it] 30%|███ | 18778/61904 [9:20:09<16:42:51, 1.40s/it] 30%|███ | 18779/61904 [9:20:10<16:57:51, 1.42s/it] 30%|███ | 18780/61904 [9:20:11<16:28:27, 1.38s/it] {'loss': 2.7182, 'learning_rate': 1.6988849993517438e-07, 'epoch': 4.85} + 30%|███ | 18780/61904 [9:20:11<16:28:27, 1.38s/it] 30%|███ | 18781/61904 [9:20:13<16:34:31, 1.38s/it] 30%|███ | 18782/61904 [9:20:14<16:27:49, 1.37s/it] 30%|███ | 18783/61904 [9:20:15<16:22:27, 1.37s/it] 30%|███ | 18784/61904 [9:20:17<16:40:39, 1.39s/it] 30%|███ | 18785/61904 [9:20:18<17:09:02, 1.43s/it] 30%|███ | 18786/61904 [9:20:20<16:32:35, 1.38s/it] 30%|███ | 18787/61904 [9:20:21<16:35:35, 1.39s/it] 30%|███ | 18788/61904 [9:20:22<16:49:45, 1.41s/it] 30%|███ | 18789/61904 [9:20:24<17:05:45, 1.43s/it] 30%|███ | 18790/61904 [9:20:25<16:50:52, 1.41s/it] 30%|███ | 18791/61904 [9:20:27<16:39:41, 1.39s/it] 30%|███ | 18792/61904 [9:20:28<16:50:27, 1.41s/it] 30%|███ | 18793/61904 [9:20:29<16:41:00, 1.39s/it] 30%|███ | 18794/61904 [9:20:31<16:35:37, 1.39s/it] 30%|███ | 18795/61904 [9:20:32<16:44:22, 1.40s/it] 30%|███ | 18796/61904 [9:20:34<17:15:03, 1.44s/it] 30%|███ | 18797/61904 [9:20:35<17:24:33, 1.45s/it] 30%|███ | 18798/61904 [9:20:37<17:08:42, 1.43s/it] 30%|███ | 18799/61904 [9:20:38<16:43:24, 1.40s/it] 30%|███ | 18800/61904 [9:20:39<17:02:38, 1.42s/it] {'loss': 2.6623, 'learning_rate': 1.6985608712563204e-07, 'epoch': 4.86} + 30%|███ | 18800/61904 [9:20:39<17:02:38, 1.42s/it] 30%|███ | 18801/61904 [9:20:41<17:05:38, 1.43s/it] 30%|███ | 18802/61904 [9:20:42<17:13:49, 1.44s/it] 30%|███ | 18803/61904 [9:20:44<17:17:53, 1.44s/it] 30%|███ | 18804/61904 [9:20:45<17:03:28, 1.42s/it] 30%|███ | 18805/61904 [9:20:47<16:46:38, 1.40s/it] 30%|███ | 18806/61904 [9:20:48<16:26:55, 1.37s/it] 30%|███ | 18807/61904 [9:20:49<16:37:41, 1.39s/it] 30%|███ | 18808/61904 [9:20:51<16:30:50, 1.38s/it] 30%|███ | 18809/61904 [9:20:52<16:35:44, 1.39s/it] 30%|███ | 18810/61904 [9:20:54<16:48:41, 1.40s/it] 30%|███ | 18811/61904 [9:20:55<16:46:16, 1.40s/it] 30%|███ | 18812/61904 [9:20:56<16:55:01, 1.41s/it] 30%|███ | 18813/61904 [9:20:58<17:21:40, 1.45s/it] 30%|███ | 18814/61904 [9:20:59<17:39:22, 1.48s/it] 30%|███ | 18815/61904 [9:21:01<17:39:28, 1.48s/it] 30%|███ | 18816/61904 [9:21:02<16:59:34, 1.42s/it] 30%|███ | 18817/61904 [9:21:03<16:27:14, 1.37s/it] 30%|███ | 18818/61904 [9:21:05<16:09:53, 1.35s/it] 30%|███ | 18819/61904 [9:21:06<15:55:19, 1.33s/it] 30%|███ | 18820/61904 [9:21:07<15:50:06, 1.32s/it] {'loss': 2.6152, 'learning_rate': 1.698236743160897e-07, 'epoch': 4.86} + 30%|███ | 18820/61904 [9:21:07<15:50:06, 1.32s/it] 30%|███ | 18821/61904 [9:21:09<16:19:34, 1.36s/it] 30%|███ | 18822/61904 [9:21:10<16:22:56, 1.37s/it] 30%|███ | 18823/61904 [9:21:12<16:14:50, 1.36s/it] 30%|███ | 18824/61904 [9:21:13<16:10:49, 1.35s/it] 30%|███ | 18825/61904 [9:21:14<16:37:06, 1.39s/it] 30%|███ | 18826/61904 [9:21:16<16:31:13, 1.38s/it] 30%|███ | 18827/61904 [9:21:17<16:42:05, 1.40s/it] 30%|███ | 18828/61904 [9:21:19<16:50:41, 1.41s/it] 30%|███ | 18829/61904 [9:21:20<17:35:49, 1.47s/it] 30%|███ | 18830/61904 [9:21:21<16:46:12, 1.40s/it] 30%|███ | 18831/61904 [9:21:23<16:24:25, 1.37s/it] 30%|███ | 18832/61904 [9:21:24<16:34:06, 1.38s/it] 30%|███ | 18833/61904 [9:21:25<16:23:37, 1.37s/it] 30%|███ | 18834/61904 [9:21:27<16:45:05, 1.40s/it] 30%|███ | 18835/61904 [9:21:28<16:48:09, 1.40s/it] 30%|███ | 18836/61904 [9:21:30<16:47:08, 1.40s/it] 30%|███ | 18837/61904 [9:21:31<16:25:52, 1.37s/it] 30%|███ | 18838/61904 [9:21:33<17:10:47, 1.44s/it] 30%|███ | 18839/61904 [9:21:34<17:23:01, 1.45s/it] 30%|███ | 18840/61904 [9:21:35<16:56:44, 1.42s/it] {'loss': 2.7202, 'learning_rate': 1.6979126150654736e-07, 'epoch': 4.87} + 30%|███ | 18840/61904 [9:21:35<16:56:44, 1.42s/it] 30%|███ | 18841/61904 [9:21:37<16:34:29, 1.39s/it] 30%|███ | 18842/61904 [9:21:38<16:44:37, 1.40s/it] 30%|███ | 18843/61904 [9:21:40<16:59:53, 1.42s/it] 30%|███ | 18844/61904 [9:21:41<16:36:12, 1.39s/it] 30%|███ | 18845/61904 [9:21:42<16:27:11, 1.38s/it] 30%|███ | 18846/61904 [9:21:44<16:44:02, 1.40s/it] 30%|███ | 18847/61904 [9:21:45<16:21:34, 1.37s/it] 30%|███ | 18848/61904 [9:21:47<16:40:15, 1.39s/it] 30%|███ | 18849/61904 [9:21:48<16:27:17, 1.38s/it] 30%|███ | 18850/61904 [9:21:49<16:19:03, 1.36s/it] 30%|███ | 18851/61904 [9:21:50<16:05:12, 1.35s/it] 30%|███ | 18852/61904 [9:21:52<16:04:39, 1.34s/it] 30%|███ | 18853/61904 [9:21:53<15:58:49, 1.34s/it] 30%|███ | 18854/61904 [9:21:55<16:10:56, 1.35s/it] 30%|███ | 18855/61904 [9:21:56<16:47:10, 1.40s/it] 30%|███ | 18856/61904 [9:21:57<16:34:09, 1.39s/it] 30%|███ | 18857/61904 [9:21:59<16:42:06, 1.40s/it] 30%|███ | 18858/61904 [9:22:00<16:39:34, 1.39s/it] 30%|███ | 18859/61904 [9:22:02<16:44:39, 1.40s/it] 30%|███ | 18860/61904 [9:22:03<17:05:51, 1.43s/it] {'loss': 2.7202, 'learning_rate': 1.6975884869700505e-07, 'epoch': 4.87} + 30%|███ | 18860/61904 [9:22:03<17:05:51, 1.43s/it] 30%|███ | 18861/61904 [9:22:05<17:32:37, 1.47s/it] 30%|███ | 18862/61904 [9:22:06<17:02:15, 1.43s/it] 30%|███ | 18863/61904 [9:22:07<17:00:56, 1.42s/it] 30%|███ | 18864/61904 [9:22:09<16:50:39, 1.41s/it] 30%|███ | 18865/61904 [9:22:10<16:31:39, 1.38s/it] 30%|███ | 18866/61904 [9:22:12<16:29:37, 1.38s/it] 30%|███ | 18867/61904 [9:22:13<16:05:54, 1.35s/it] 30%|███ | 18868/61904 [9:22:14<16:27:11, 1.38s/it] 30%|███ | 18869/61904 [9:22:16<16:39:23, 1.39s/it] 30%|███ | 18870/61904 [9:22:17<16:56:13, 1.42s/it] 30%|███ | 18871/61904 [9:22:18<16:26:54, 1.38s/it] 30%|███ | 18872/61904 [9:22:20<16:15:16, 1.36s/it] 30%|███ | 18873/61904 [9:22:21<15:58:34, 1.34s/it] 30%|███ | 18874/61904 [9:22:22<15:35:17, 1.30s/it] 30%|███ | 18875/61904 [9:22:24<16:35:55, 1.39s/it] 30%|███ | 18876/61904 [9:22:25<16:51:29, 1.41s/it] 30%|███ | 18877/61904 [9:22:27<17:10:48, 1.44s/it] 30%|███ | 18878/61904 [9:22:28<16:50:33, 1.41s/it] 30%|███ | 18879/61904 [9:22:30<17:36:09, 1.47s/it] 30%|███ | 18880/61904 [9:22:32<18:44:05, 1.57s/it] {'loss': 2.6912, 'learning_rate': 1.6972643588746271e-07, 'epoch': 4.88} + 30%|███ | 18880/61904 [9:22:32<18:44:05, 1.57s/it] 31%|███ | 18881/61904 [9:22:33<17:39:26, 1.48s/it] 31%|███ | 18882/61904 [9:22:34<17:20:31, 1.45s/it] 31%|███ | 18883/61904 [9:22:36<17:13:03, 1.44s/it] 31%|███ | 18884/61904 [9:22:37<16:36:47, 1.39s/it] 31%|███ | 18885/61904 [9:22:38<16:43:00, 1.40s/it] 31%|███ | 18886/61904 [9:22:40<17:11:51, 1.44s/it] 31%|███ | 18887/61904 [9:22:41<17:34:36, 1.47s/it] 31%|███ | 18888/61904 [9:22:43<17:32:48, 1.47s/it] 31%|███ | 18889/61904 [9:22:44<16:55:30, 1.42s/it] 31%|███ | 18890/61904 [9:22:46<17:26:37, 1.46s/it] 31%|███ | 18891/61904 [9:22:47<16:57:40, 1.42s/it] 31%|███ | 18892/61904 [9:22:48<16:29:25, 1.38s/it] 31%|███ | 18893/61904 [9:22:50<16:57:39, 1.42s/it] 31%|███ | 18894/61904 [9:22:51<16:56:04, 1.42s/it] 31%|███ | 18895/61904 [9:22:53<16:30:58, 1.38s/it] 31%|███ | 18896/61904 [9:22:54<17:07:07, 1.43s/it] 31%|███ | 18897/61904 [9:22:56<17:07:45, 1.43s/it] 31%|███ | 18898/61904 [9:22:57<16:41:49, 1.40s/it] 31%|███ | 18899/61904 [9:22:58<16:44:12, 1.40s/it] 31%|███ | 18900/61904 [9:23:00<16:42:29, 1.40s/it] {'loss': 2.6118, 'learning_rate': 1.6969402307792038e-07, 'epoch': 4.88} + 31%|███ | 18900/61904 [9:23:00<16:42:29, 1.40s/it] 31%|███ | 18901/61904 [9:23:01<16:40:00, 1.40s/it] 31%|███ | 18902/61904 [9:23:03<17:01:48, 1.43s/it] 31%|███ | 18903/61904 [9:23:04<16:20:02, 1.37s/it] 31%|███ | 18904/61904 [9:23:05<16:21:17, 1.37s/it] 31%|███ | 18905/61904 [9:23:06<16:13:48, 1.36s/it] 31%|███ | 18906/61904 [9:23:08<16:27:01, 1.38s/it] 31%|███ | 18907/61904 [9:23:09<16:47:26, 1.41s/it] 31%|███ | 18908/61904 [9:23:11<16:33:49, 1.39s/it] 31%|███ | 18909/61904 [9:23:12<15:57:50, 1.34s/it] 31%|███ | 18910/61904 [9:23:13<16:33:09, 1.39s/it] 31%|███ | 18911/61904 [9:23:15<16:39:17, 1.39s/it] 31%|███ | 18912/61904 [9:23:16<16:13:59, 1.36s/it] 31%|███ | 18913/61904 [9:23:17<15:59:43, 1.34s/it] 31%|███ | 18914/61904 [9:23:19<15:46:12, 1.32s/it] 31%|███ | 18915/61904 [9:23:20<15:35:40, 1.31s/it] 31%|███ | 18916/61904 [9:23:21<15:18:19, 1.28s/it] 31%|███ | 18917/61904 [9:23:23<15:37:23, 1.31s/it] 31%|███ | 18918/61904 [9:23:24<15:29:57, 1.30s/it] 31%|███ | 18919/61904 [9:23:25<15:54:22, 1.33s/it] 31%|███ | 18920/61904 [9:23:27<16:13:54, 1.36s/it] {'loss': 2.7578, 'learning_rate': 1.6966161026837806e-07, 'epoch': 4.89} + 31%|███ | 18920/61904 [9:23:27<16:13:54, 1.36s/it] 31%|███ | 18921/61904 [9:23:28<16:28:45, 1.38s/it] 31%|███ | 18922/61904 [9:23:30<16:41:35, 1.40s/it] 31%|███ | 18923/61904 [9:23:31<17:10:39, 1.44s/it] 31%|███ | 18924/61904 [9:23:33<17:19:53, 1.45s/it] 31%|███ | 18925/61904 [9:23:34<17:48:30, 1.49s/it] 31%|███ | 18926/61904 [9:23:36<17:42:03, 1.48s/it] 31%|███ | 18927/61904 [9:23:37<16:56:54, 1.42s/it] 31%|███ | 18928/61904 [9:23:38<16:58:15, 1.42s/it] 31%|███ | 18929/61904 [9:23:40<16:57:30, 1.42s/it] 31%|███ | 18930/61904 [9:23:41<16:41:04, 1.40s/it] 31%|███ | 18931/61904 [9:23:42<16:49:42, 1.41s/it] 31%|███ | 18932/61904 [9:23:44<16:12:24, 1.36s/it] 31%|███ | 18933/61904 [9:23:45<16:06:57, 1.35s/it] 31%|███ | 18934/61904 [9:23:46<16:08:15, 1.35s/it] 31%|███ | 18935/61904 [9:23:48<15:49:25, 1.33s/it] 31%|███ | 18936/61904 [9:23:49<16:00:00, 1.34s/it] 31%|███ | 18937/61904 [9:23:50<15:56:58, 1.34s/it] 31%|███ | 18938/61904 [9:23:52<15:45:59, 1.32s/it] 31%|███ | 18939/61904 [9:23:53<16:09:41, 1.35s/it] 31%|███ | 18940/61904 [9:23:54<15:56:04, 1.34s/it] {'loss': 2.6587, 'learning_rate': 1.6962919745883573e-07, 'epoch': 4.89} + 31%|███ | 18940/61904 [9:23:54<15:56:04, 1.34s/it] 31%|███ | 18941/61904 [9:23:56<15:45:44, 1.32s/it] 31%|███ | 18942/61904 [9:23:57<15:54:57, 1.33s/it] 31%|███ | 18943/61904 [9:23:58<15:51:12, 1.33s/it] 31%|███ | 18944/61904 [9:24:00<16:14:54, 1.36s/it] 31%|███ | 18945/61904 [9:24:01<16:07:18, 1.35s/it] 31%|███ | 18946/61904 [9:24:02<15:52:25, 1.33s/it] 31%|███ | 18947/61904 [9:24:04<16:19:39, 1.37s/it] 31%|███ | 18948/61904 [9:24:05<16:32:47, 1.39s/it] 31%|███ | 18949/61904 [9:24:07<16:33:34, 1.39s/it] 31%|███ | 18950/61904 [9:24:08<16:52:12, 1.41s/it] 31%|███ | 18951/61904 [9:24:10<16:56:38, 1.42s/it] 31%|███ | 18952/61904 [9:24:11<17:01:08, 1.43s/it] 31%|███ | 18953/61904 [9:24:12<16:56:15, 1.42s/it] 31%|███ | 18954/61904 [9:24:14<17:09:11, 1.44s/it] 31%|███ | 18955/61904 [9:24:15<16:32:39, 1.39s/it] 31%|███ | 18956/61904 [9:24:17<16:50:20, 1.41s/it] 31%|███ | 18957/61904 [9:24:18<16:35:15, 1.39s/it] 31%|███ | 18958/61904 [9:24:19<16:32:58, 1.39s/it] 31%|███ | 18959/61904 [9:24:21<16:07:58, 1.35s/it] 31%|███ | 18960/61904 [9:24:22<16:21:06, 1.37s/it] {'loss': 2.6077, 'learning_rate': 1.695967846492934e-07, 'epoch': 4.9} + 31%|███ | 18960/61904 [9:24:22<16:21:06, 1.37s/it] 31%|███ | 18961/61904 [9:24:24<17:08:54, 1.44s/it] 31%|███ | 18962/61904 [9:24:25<17:08:56, 1.44s/it] 31%|███ | 18963/61904 [9:24:27<17:14:17, 1.45s/it] 31%|███ | 18964/61904 [9:24:28<17:29:35, 1.47s/it] 31%|███ | 18965/61904 [9:24:30<18:18:36, 1.54s/it] 31%|███ | 18966/61904 [9:24:31<17:38:09, 1.48s/it] 31%|███ | 18967/61904 [9:24:32<17:07:14, 1.44s/it] 31%|███ | 18968/61904 [9:24:34<16:51:04, 1.41s/it] 31%|███ | 18969/61904 [9:24:35<16:52:34, 1.42s/it] 31%|███ | 18970/61904 [9:24:37<16:55:57, 1.42s/it] 31%|███ | 18971/61904 [9:24:38<16:48:00, 1.41s/it] 31%|███ | 18972/61904 [9:24:39<16:53:15, 1.42s/it] 31%|███ | 18973/61904 [9:24:41<16:43:01, 1.40s/it] 31%|███ | 18974/61904 [9:24:42<16:30:08, 1.38s/it] 31%|███ | 18975/61904 [9:24:43<15:57:16, 1.34s/it] 31%|███ | 18976/61904 [9:24:45<15:55:10, 1.34s/it] 31%|███ | 18977/61904 [9:24:46<15:57:07, 1.34s/it] 31%|███ | 18978/61904 [9:24:48<16:26:48, 1.38s/it] 31%|███ | 18979/61904 [9:24:49<16:42:19, 1.40s/it] 31%|███ | 18980/61904 [9:24:50<16:51:05, 1.41s/it] {'loss': 2.7021, 'learning_rate': 1.6956437183975108e-07, 'epoch': 4.91} + 31%|███ | 18980/61904 [9:24:50<16:51:05, 1.41s/it] 31%|███ | 18981/61904 [9:24:52<16:50:26, 1.41s/it] 31%|███ | 18982/61904 [9:24:53<16:46:24, 1.41s/it] 31%|███ | 18983/61904 [9:24:55<17:31:40, 1.47s/it] 31%|███ | 18984/61904 [9:24:56<17:32:42, 1.47s/it] 31%|███ | 18985/61904 [9:24:58<16:50:39, 1.41s/it] 31%|███ | 18986/61904 [9:24:59<16:55:03, 1.42s/it] 31%|███ | 18987/61904 [9:25:01<17:08:21, 1.44s/it] 31%|███ | 18988/61904 [9:25:02<16:47:41, 1.41s/it] 31%|███ | 18989/61904 [9:25:03<16:42:59, 1.40s/it] 31%|███ | 18990/61904 [9:25:05<16:53:28, 1.42s/it] 31%|███ | 18991/61904 [9:25:06<16:57:46, 1.42s/it] 31%|███ | 18992/61904 [9:25:08<17:18:16, 1.45s/it] 31%|███ | 18993/61904 [9:25:09<17:04:49, 1.43s/it] 31%|███ | 18994/61904 [9:25:10<16:57:15, 1.42s/it] 31%|███ | 18995/61904 [9:25:12<17:07:56, 1.44s/it] 31%|███ | 18996/61904 [9:25:13<17:10:38, 1.44s/it] 31%|███ | 18997/61904 [9:25:15<17:29:11, 1.47s/it] 31%|███ | 18998/61904 [9:25:16<17:36:42, 1.48s/it] 31%|███ | 18999/61904 [9:25:18<16:45:13, 1.41s/it] 31%|███ | 19000/61904 [9:25:19<16:30:20, 1.38s/it] {'loss': 2.6659, 'learning_rate': 1.6953195903020874e-07, 'epoch': 4.91} + 31%|███ | 19000/61904 [9:25:19<16:30:20, 1.38s/it] 31%|███ | 19001/61904 [9:25:21<17:38:09, 1.48s/it] 31%|███ | 19002/61904 [9:25:22<17:25:14, 1.46s/it] 31%|███ | 19003/61904 [9:25:24<17:29:43, 1.47s/it] 31%|███ | 19004/61904 [9:25:25<16:58:05, 1.42s/it] 31%|███ | 19005/61904 [9:25:26<16:54:03, 1.42s/it] 31%|███ | 19006/61904 [9:25:28<16:49:47, 1.41s/it] 31%|███ | 19007/61904 [9:25:29<17:00:02, 1.43s/it] 31%|███ | 19008/61904 [9:25:31<16:48:24, 1.41s/it] 31%|███ | 19009/61904 [9:25:32<16:39:12, 1.40s/it] 31%|███ | 19010/61904 [9:25:33<17:03:53, 1.43s/it] 31%|███ | 19011/61904 [9:25:35<16:36:46, 1.39s/it] 31%|███ | 19012/61904 [9:25:36<16:18:08, 1.37s/it] 31%|███ | 19013/61904 [9:25:38<16:36:56, 1.39s/it] 31%|███ | 19014/61904 [9:25:39<16:55:11, 1.42s/it] 31%|███ | 19015/61904 [9:25:40<17:06:09, 1.44s/it] 31%|███ | 19016/61904 [9:25:42<17:12:26, 1.44s/it] 31%|███ | 19017/61904 [9:25:43<17:21:30, 1.46s/it] 31%|███ | 19018/61904 [9:25:45<17:04:00, 1.43s/it] 31%|███ | 19019/61904 [9:25:46<16:44:12, 1.40s/it] 31%|███ | 19020/61904 [9:25:47<16:26:10, 1.38s/it] {'loss': 2.6508, 'learning_rate': 1.694995462206664e-07, 'epoch': 4.92} + 31%|███ | 19020/61904 [9:25:47<16:26:10, 1.38s/it] 31%|███ | 19021/61904 [9:25:49<16:33:45, 1.39s/it] 31%|███ | 19022/61904 [9:25:50<16:19:39, 1.37s/it] 31%|███ | 19023/61904 [9:25:51<15:55:03, 1.34s/it] 31%|███ | 19024/61904 [9:25:53<16:07:55, 1.35s/it] 31%|███ | 19025/61904 [9:25:54<16:37:28, 1.40s/it] 31%|███ | 19026/61904 [9:25:56<16:09:24, 1.36s/it] 31%|███ | 19027/61904 [9:25:57<16:32:55, 1.39s/it] 31%|███ | 19028/61904 [9:25:58<16:07:18, 1.35s/it] 31%|███ | 19029/61904 [9:26:00<16:06:24, 1.35s/it] 31%|███ | 19030/61904 [9:26:01<16:08:40, 1.36s/it] 31%|███ | 19031/61904 [9:26:02<15:51:32, 1.33s/it] 31%|███ | 19032/61904 [9:26:04<16:07:55, 1.35s/it] 31%|███ | 19033/61904 [9:26:05<16:11:40, 1.36s/it] 31%|███ | 19034/61904 [9:26:06<16:00:58, 1.34s/it] 31%|███ | 19035/61904 [9:26:08<16:06:25, 1.35s/it] 31%|███ | 19036/61904 [9:26:09<16:48:22, 1.41s/it] 31%|███ | 19037/61904 [9:26:11<16:29:49, 1.39s/it] 31%|███ | 19038/61904 [9:26:12<16:47:17, 1.41s/it] 31%|███ | 19039/61904 [9:26:14<16:53:05, 1.42s/it] 31%|███ | 19040/61904 [9:26:15<16:28:47, 1.38s/it] {'loss': 2.7058, 'learning_rate': 1.6946713341112406e-07, 'epoch': 4.92} + 31%|███ | 19040/61904 [9:26:15<16:28:47, 1.38s/it] 31%|███ | 19041/61904 [9:26:16<16:47:14, 1.41s/it] 31%|███ | 19042/61904 [9:26:18<17:26:07, 1.46s/it] 31%|███ | 19043/61904 [9:26:19<16:47:16, 1.41s/it] 31%|███ | 19044/61904 [9:26:21<16:45:43, 1.41s/it] 31%|███ | 19045/61904 [9:26:22<17:09:02, 1.44s/it] 31%|███ | 19046/61904 [9:26:23<16:46:43, 1.41s/it] 31%|███ | 19047/61904 [9:26:25<16:42:52, 1.40s/it] 31%|███ | 19048/61904 [9:26:26<16:33:16, 1.39s/it] 31%|███ | 19049/61904 [9:26:28<16:24:42, 1.38s/it] 31%|███ | 19050/61904 [9:26:29<16:10:23, 1.36s/it] 31%|███ | 19051/61904 [9:26:30<16:08:49, 1.36s/it] 31%|███ | 19052/61904 [9:26:32<15:54:53, 1.34s/it] 31%|███ | 19053/61904 [9:26:33<15:57:35, 1.34s/it] 31%|███ | 19054/61904 [9:26:34<16:00:23, 1.34s/it] 31%|███ | 19055/61904 [9:26:36<16:10:45, 1.36s/it] 31%|███ | 19056/61904 [9:26:37<16:09:03, 1.36s/it] 31%|███ | 19057/61904 [9:26:38<16:20:19, 1.37s/it] 31%|███ | 19058/61904 [9:26:40<16:14:33, 1.36s/it] 31%|███ | 19059/61904 [9:26:41<16:28:52, 1.38s/it] 31%|███ | 19060/61904 [9:26:43<16:30:32, 1.39s/it] {'loss': 2.7142, 'learning_rate': 1.6943472060158172e-07, 'epoch': 4.93} + 31%|███ | 19060/61904 [9:26:43<16:30:32, 1.39s/it] 31%|███ | 19061/61904 [9:26:44<16:05:12, 1.35s/it] 31%|███ | 19062/61904 [9:26:45<16:06:16, 1.35s/it] 31%|███ | 19063/61904 [9:26:47<16:29:03, 1.39s/it] 31%|███ | 19064/61904 [9:26:48<16:40:44, 1.40s/it] 31%|███ | 19065/61904 [9:26:49<16:22:24, 1.38s/it] 31%|███ | 19066/61904 [9:26:51<16:22:41, 1.38s/it] 31%|███ | 19067/61904 [9:26:52<16:18:37, 1.37s/it] 31%|███ | 19068/61904 [9:26:54<16:19:19, 1.37s/it] 31%|███ | 19069/61904 [9:26:55<16:31:59, 1.39s/it] 31%|███ | 19070/61904 [9:26:56<16:52:47, 1.42s/it] 31%|███ | 19071/61904 [9:26:58<17:14:51, 1.45s/it] 31%|███ | 19072/61904 [9:26:59<17:06:30, 1.44s/it] 31%|███ | 19073/61904 [9:27:01<16:44:54, 1.41s/it] 31%|███ | 19074/61904 [9:27:02<16:49:36, 1.41s/it] 31%|███ | 19075/61904 [9:27:03<16:41:27, 1.40s/it] 31%|███ | 19076/61904 [9:27:05<16:50:01, 1.41s/it] 31%|███ | 19077/61904 [9:27:06<16:49:59, 1.41s/it] 31%|███ | 19078/61904 [9:27:08<17:05:25, 1.44s/it] 31%|███ | 19079/61904 [9:27:09<17:01:03, 1.43s/it] 31%|███ | 19080/61904 [9:27:11<16:20:44, 1.37s/it] {'loss': 2.6591, 'learning_rate': 1.694023077920394e-07, 'epoch': 4.93} + 31%|███ | 19080/61904 [9:27:11<16:20:44, 1.37s/it] 31%|███ | 19081/61904 [9:27:12<16:05:11, 1.35s/it] 31%|███ | 19082/61904 [9:27:13<15:54:36, 1.34s/it] 31%|███ | 19083/61904 [9:27:14<15:42:27, 1.32s/it] 31%|███ | 19084/61904 [9:27:16<15:46:13, 1.33s/it] 31%|███ | 19085/61904 [9:27:17<16:29:01, 1.39s/it] 31%|███ | 19086/61904 [9:27:19<16:09:15, 1.36s/it] 31%|███ | 19087/61904 [9:27:20<16:57:26, 1.43s/it] 31%|███ | 19088/61904 [9:27:21<16:33:25, 1.39s/it] 31%|███ | 19089/61904 [9:27:23<16:54:53, 1.42s/it] 31%|███ | 19090/61904 [9:27:24<17:09:35, 1.44s/it] 31%|███ | 19091/61904 [9:27:26<17:15:03, 1.45s/it] 31%|███ | 19092/61904 [9:27:27<16:37:44, 1.40s/it] 31%|███ | 19093/61904 [9:27:28<16:20:28, 1.37s/it] 31%|███ | 19094/61904 [9:27:30<16:20:58, 1.37s/it] 31%|███ | 19095/61904 [9:27:31<16:19:57, 1.37s/it] 31%|███ | 19096/61904 [9:27:33<16:32:47, 1.39s/it] 31%|███ | 19097/61904 [9:27:34<17:16:15, 1.45s/it] 31%|███ | 19098/61904 [9:27:36<17:01:53, 1.43s/it] 31%|███ | 19099/61904 [9:27:37<17:21:27, 1.46s/it] 31%|███ | 19100/61904 [9:27:39<16:59:50, 1.43s/it] {'loss': 2.6391, 'learning_rate': 1.6936989498249707e-07, 'epoch': 4.94} + 31%|███ | 19100/61904 [9:27:39<16:59:50, 1.43s/it] 31%|███ | 19101/61904 [9:27:40<16:37:06, 1.40s/it] 31%|███ | 19102/61904 [9:27:41<16:49:26, 1.42s/it] 31%|███ | 19103/61904 [9:27:43<16:43:13, 1.41s/it] 31%|███ | 19104/61904 [9:27:44<16:52:10, 1.42s/it] 31%|███ | 19105/61904 [9:27:45<16:31:24, 1.39s/it] 31%|███ | 19106/61904 [9:27:47<16:21:41, 1.38s/it] 31%|███ | 19107/61904 [9:27:48<16:36:20, 1.40s/it] 31%|███ | 19108/61904 [9:27:50<16:19:33, 1.37s/it] 31%|███ | 19109/61904 [9:27:51<16:07:01, 1.36s/it] 31%|███ | 19110/61904 [9:27:52<15:55:38, 1.34s/it] 31%|███ | 19111/61904 [9:27:54<16:20:26, 1.37s/it] 31%|███ | 19112/61904 [9:27:55<16:25:53, 1.38s/it] 31%|███ | 19113/61904 [9:27:57<16:45:25, 1.41s/it] 31%|███ | 19114/61904 [9:27:58<16:30:04, 1.39s/it] 31%|███ | 19115/61904 [9:27:59<17:01:17, 1.43s/it] 31%|███ | 19116/61904 [9:28:01<16:36:45, 1.40s/it] 31%|███ | 19117/61904 [9:28:02<16:39:30, 1.40s/it] 31%|███ | 19118/61904 [9:28:03<16:15:40, 1.37s/it] 31%|███ | 19119/61904 [9:28:05<16:04:11, 1.35s/it] 31%|███ | 19120/61904 [9:28:06<16:25:21, 1.38s/it] {'loss': 2.7261, 'learning_rate': 1.6933748217295474e-07, 'epoch': 4.94} + 31%|███ | 19120/61904 [9:28:06<16:25:21, 1.38s/it] 31%|███ | 19121/61904 [9:28:08<16:50:22, 1.42s/it] 31%|███ | 19122/61904 [9:28:09<16:42:47, 1.41s/it] 31%|███ | 19123/61904 [9:28:10<16:29:16, 1.39s/it] 31%|███ | 19124/61904 [9:28:12<16:51:18, 1.42s/it] 31%|███ | 19125/61904 [9:28:13<16:36:46, 1.40s/it] 31%|███ | 19126/61904 [9:28:15<16:26:36, 1.38s/it] 31%|███ | 19127/61904 [9:28:16<16:30:53, 1.39s/it] 31%|███ | 19128/61904 [9:28:17<15:42:55, 1.32s/it] 31%|███ | 19129/61904 [9:28:19<16:24:39, 1.38s/it] 31%|███ | 19130/61904 [9:28:20<16:15:32, 1.37s/it] 31%|███ | 19131/61904 [9:28:21<16:22:00, 1.38s/it] 31%|███ | 19132/61904 [9:28:23<16:43:47, 1.41s/it] 31%|███ | 19133/61904 [9:28:24<16:42:54, 1.41s/it] 31%|███ | 19134/61904 [9:28:26<16:29:50, 1.39s/it] 31%|███ | 19135/61904 [9:28:27<16:40:26, 1.40s/it] 31%|███ | 19136/61904 [9:28:29<16:56:30, 1.43s/it] 31%|███ | 19137/61904 [9:28:30<16:47:33, 1.41s/it] 31%|███ | 19138/61904 [9:28:31<16:23:17, 1.38s/it] 31%|███ | 19139/61904 [9:28:33<16:11:29, 1.36s/it] 31%|███ | 19140/61904 [9:28:34<16:07:46, 1.36s/it] {'loss': 2.6897, 'learning_rate': 1.6930506936341242e-07, 'epoch': 4.95} + 31%|███ | 19140/61904 [9:28:34<16:07:46, 1.36s/it] 31%|███ | 19141/61904 [9:28:35<15:46:34, 1.33s/it] 31%|███ | 19142/61904 [9:28:37<15:57:43, 1.34s/it] 31%|███ | 19143/61904 [9:28:38<15:58:48, 1.35s/it] 31%|███ | 19144/61904 [9:28:39<15:46:33, 1.33s/it] 31%|███ | 19145/61904 [9:28:41<16:21:31, 1.38s/it] 31%|███ | 19146/61904 [9:28:42<16:20:09, 1.38s/it] 31%|███ | 19147/61904 [9:28:44<16:51:15, 1.42s/it] 31%|███ | 19148/61904 [9:28:45<16:11:26, 1.36s/it] 31%|███ | 19149/61904 [9:28:46<16:46:04, 1.41s/it] 31%|███ | 19150/61904 [9:28:48<16:20:39, 1.38s/it] 31%|███ | 19151/61904 [9:28:49<15:53:02, 1.34s/it] 31%|███ | 19152/61904 [9:28:50<15:34:57, 1.31s/it] 31%|███ | 19153/61904 [9:28:51<15:40:08, 1.32s/it] 31%|███ | 19154/61904 [9:28:53<15:29:52, 1.31s/it] 31%|███ | 19155/61904 [9:28:54<15:51:02, 1.33s/it] 31%|███ | 19156/61904 [9:28:56<16:07:38, 1.36s/it] 31%|███ | 19157/61904 [9:28:57<15:58:27, 1.35s/it] 31%|███ | 19158/61904 [9:28:58<16:00:10, 1.35s/it] 31%|███ | 19159/61904 [9:29:00<15:47:15, 1.33s/it] 31%|███ | 19160/61904 [9:29:01<15:51:52, 1.34s/it] {'loss': 2.6762, 'learning_rate': 1.6927265655387009e-07, 'epoch': 4.95} + 31%|███ | 19160/61904 [9:29:01<15:51:52, 1.34s/it] 31%|███ | 19161/61904 [9:29:02<16:48:33, 1.42s/it] 31%|███ | 19162/61904 [9:29:04<16:24:45, 1.38s/it] 31%|███ | 19163/61904 [9:29:05<16:11:22, 1.36s/it] 31%|███ | 19164/61904 [9:29:07<16:50:18, 1.42s/it] 31%|███ | 19165/61904 [9:29:08<16:26:32, 1.38s/it] 31%|███ | 19166/61904 [9:29:09<16:33:54, 1.40s/it] 31%|███ | 19167/61904 [9:29:11<16:31:49, 1.39s/it] 31%|███ | 19168/61904 [9:29:12<16:40:35, 1.40s/it] 31%|███ | 19169/61904 [9:29:14<16:32:30, 1.39s/it] 31%|███ | 19170/61904 [9:29:15<16:07:26, 1.36s/it] 31%|███ | 19171/61904 [9:29:16<16:55:46, 1.43s/it] 31%|███ | 19172/61904 [9:29:18<16:42:15, 1.41s/it] 31%|███ | 19173/61904 [9:29:19<16:53:29, 1.42s/it] 31%|███ | 19174/61904 [9:29:21<16:58:01, 1.43s/it] 31%|███ | 19175/61904 [9:29:22<16:53:06, 1.42s/it] 31%|███ | 19176/61904 [9:29:24<17:06:27, 1.44s/it] 31%|███ | 19177/61904 [9:29:25<17:43:06, 1.49s/it] 31%|███ | 19178/61904 [9:29:27<17:38:17, 1.49s/it] 31%|███ | 19179/61904 [9:29:28<17:34:43, 1.48s/it] 31%|███ | 19180/61904 [9:29:29<16:45:23, 1.41s/it] {'loss': 2.6687, 'learning_rate': 1.6924024374432775e-07, 'epoch': 4.96} + 31%|███ | 19180/61904 [9:29:29<16:45:23, 1.41s/it] 31%|███ | 19181/61904 [9:29:31<16:27:01, 1.39s/it] 31%|███ | 19182/61904 [9:29:32<15:56:27, 1.34s/it] 31%|███ | 19183/61904 [9:29:34<16:47:07, 1.41s/it] 31%|███ | 19184/61904 [9:29:35<16:42:48, 1.41s/it] 31%|███ | 19185/61904 [9:29:36<16:55:30, 1.43s/it] 31%|███ | 19186/61904 [9:29:38<16:51:11, 1.42s/it] 31%|███ | 19187/61904 [9:29:39<16:24:42, 1.38s/it] 31%|███ | 19188/61904 [9:29:40<16:16:07, 1.37s/it] 31%|███ | 19189/61904 [9:29:42<16:56:59, 1.43s/it] 31%|███ | 19190/61904 [9:29:43<16:33:49, 1.40s/it] 31%|███ | 19191/61904 [9:29:45<16:21:06, 1.38s/it] 31%|███ | 19192/61904 [9:29:46<16:20:15, 1.38s/it] 31%|███ | 19193/61904 [9:29:47<16:26:46, 1.39s/it] 31%|███ | 19194/61904 [9:29:49<16:28:18, 1.39s/it] 31%|███ | 19195/61904 [9:29:50<16:23:23, 1.38s/it] 31%|███ | 19196/61904 [9:29:52<16:18:55, 1.38s/it] 31%|███ | 19197/61904 [9:29:53<16:59:16, 1.43s/it] 31%|███ | 19198/61904 [9:29:54<16:29:57, 1.39s/it] 31%|███ | 19199/61904 [9:29:56<16:27:38, 1.39s/it] 31%|███ | 19200/61904 [9:29:57<16:45:54, 1.41s/it] {'loss': 2.692, 'learning_rate': 1.692078309347854e-07, 'epoch': 4.96} + 31%|███ | 19200/61904 [9:29:57<16:45:54, 1.41s/it] 31%|███ | 19201/61904 [9:29:59<16:23:44, 1.38s/it] 31%|███ | 19202/61904 [9:30:00<16:14:23, 1.37s/it] 31%|███ | 19203/61904 [9:30:01<16:15:58, 1.37s/it] 31%|███ | 19204/61904 [9:30:03<16:00:50, 1.35s/it] 31%|███ | 19205/61904 [9:30:04<15:46:48, 1.33s/it] 31%|███ | 19206/61904 [9:30:05<15:46:27, 1.33s/it] 31%|███ | 19207/61904 [9:30:07<16:26:14, 1.39s/it] 31%|███ | 19208/61904 [9:30:08<16:30:34, 1.39s/it] 31%|███ | 19209/61904 [9:30:10<16:47:27, 1.42s/it] 31%|███ | 19210/61904 [9:30:11<16:57:42, 1.43s/it] 31%|███ | 19211/61904 [9:30:13<17:12:40, 1.45s/it] 31%|███ | 19212/61904 [9:30:14<17:14:49, 1.45s/it] 31%|███ | 19213/61904 [9:30:15<16:41:47, 1.41s/it] 31%|███ | 19214/61904 [9:30:17<16:13:13, 1.37s/it] 31%|███ | 19215/61904 [9:30:18<16:21:46, 1.38s/it] 31%|███ | 19216/61904 [9:30:19<16:43:33, 1.41s/it] 31%|███ | 19217/61904 [9:30:21<17:10:01, 1.45s/it] 31%|███ | 19218/61904 [9:30:23<17:22:37, 1.47s/it] 31%|��██ | 19219/61904 [9:30:24<17:16:14, 1.46s/it] 31%|███ | 19220/61904 [9:30:25<17:10:32, 1.45s/it] {'loss': 2.6762, 'learning_rate': 1.6917541812524307e-07, 'epoch': 4.97} + 31%|███ | 19220/61904 [9:30:25<17:10:32, 1.45s/it] 31%|███ | 19221/61904 [9:30:27<16:37:14, 1.40s/it] 31%|███ | 19222/61904 [9:30:28<16:46:55, 1.42s/it] 31%|███ | 19223/61904 [9:30:30<17:14:29, 1.45s/it] 31%|███ | 19224/61904 [9:30:31<17:14:54, 1.45s/it] 31%|███ | 19225/61904 [9:30:32<16:47:20, 1.42s/it] 31%|███ | 19226/61904 [9:30:34<16:38:43, 1.40s/it] 31%|███ | 19227/61904 [9:30:35<16:16:38, 1.37s/it] 31%|███ | 19228/61904 [9:30:37<16:40:35, 1.41s/it] 31%|███ | 19229/61904 [9:30:38<16:19:17, 1.38s/it] 31%|███ | 19230/61904 [9:30:39<16:46:59, 1.42s/it] 31%|███ | 19231/61904 [9:30:41<16:24:54, 1.38s/it] 31%|███ | 19232/61904 [9:30:42<17:11:05, 1.45s/it] 31%|███ | 19233/61904 [9:30:44<16:53:21, 1.42s/it] 31%|███ | 19234/61904 [9:30:45<16:38:51, 1.40s/it] 31%|███ | 19235/61904 [9:30:46<16:22:30, 1.38s/it] 31%|███ | 19236/61904 [9:30:48<16:19:14, 1.38s/it] 31%|███ | 19237/61904 [9:30:49<16:13:32, 1.37s/it] 31%|███ | 19238/61904 [9:30:50<16:06:10, 1.36s/it] 31%|███ | 19239/61904 [9:30:52<16:22:15, 1.38s/it] 31%|███ | 19240/61904 [9:30:53<16:25:29, 1.39s/it] {'loss': 2.6559, 'learning_rate': 1.6914300531570076e-07, 'epoch': 4.97} + 31%|███ | 19240/61904 [9:30:53<16:25:29, 1.39s/it] 31%|███ | 19241/61904 [9:30:55<15:55:33, 1.34s/it] 31%|███ | 19242/61904 [9:30:56<15:57:34, 1.35s/it] 31%|███ | 19243/61904 [9:30:57<15:58:12, 1.35s/it] 31%|███ | 19244/61904 [9:30:59<15:58:11, 1.35s/it] 31%|███ | 19245/61904 [9:31:00<16:28:09, 1.39s/it] 31%|███ | 19246/61904 [9:31:01<15:56:54, 1.35s/it] 31%|███ | 19247/61904 [9:31:03<16:04:06, 1.36s/it] 31%|███ | 19248/61904 [9:31:04<16:16:24, 1.37s/it] 31%|███ | 19249/61904 [9:31:05<15:57:10, 1.35s/it] 31%|███ | 19250/61904 [9:31:07<17:44:34, 1.50s/it] 31%|███ | 19251/61904 [9:31:09<18:02:09, 1.52s/it] 31%|███ | 19252/61904 [9:31:10<17:03:14, 1.44s/it] 31%|███ | 19253/61904 [9:31:11<16:28:06, 1.39s/it] 31%|███ | 19254/61904 [9:31:13<16:55:32, 1.43s/it] 31%|███ | 19255/61904 [9:31:14<16:46:44, 1.42s/it] 31%|███ | 19256/61904 [9:31:16<17:29:03, 1.48s/it] 31%|███ | 19257/61904 [9:31:17<17:26:06, 1.47s/it] 31%|███ | 19258/61904 [9:31:19<16:55:44, 1.43s/it] 31%|███ | 19259/61904 [9:31:20<16:42:08, 1.41s/it] 31%|███ | 19260/61904 [9:31:21<16:17:51, 1.38s/it] {'loss': 2.6284, 'learning_rate': 1.6911059250615842e-07, 'epoch': 4.98} + 31%|███ | 19260/61904 [9:31:21<16:17:51, 1.38s/it] 31%|███ | 19261/61904 [9:31:23<16:19:23, 1.38s/it] 31%|███ | 19262/61904 [9:31:24<16:46:34, 1.42s/it] 31%|███ | 19263/61904 [9:31:26<16:44:38, 1.41s/it] 31%|███ | 19264/61904 [9:31:27<16:47:48, 1.42s/it] 31%|███ | 19265/61904 [9:31:29<17:01:00, 1.44s/it] 31%|███ | 19266/61904 [9:31:30<16:58:24, 1.43s/it] 31%|███ | 19267/61904 [9:31:31<16:54:26, 1.43s/it] 31%|███ | 19268/61904 [9:31:33<16:20:32, 1.38s/it] 31%|███ | 19269/61904 [9:31:34<16:26:38, 1.39s/it] 31%|███ | 19270/61904 [9:31:35<16:36:24, 1.40s/it] 31%|███ | 19271/61904 [9:31:37<16:34:27, 1.40s/it] 31%|███ | 19272/61904 [9:31:38<16:21:42, 1.38s/it] 31%|███ | 19273/61904 [9:31:39<15:51:33, 1.34s/it] 31%|███ | 19274/61904 [9:31:41<16:04:46, 1.36s/it] 31%|███ | 19275/61904 [9:31:42<16:33:41, 1.40s/it] 31%|███ | 19276/61904 [9:31:44<16:02:50, 1.36s/it] 31%|███ | 19277/61904 [9:31:45<15:54:08, 1.34s/it] 31%|███ | 19278/61904 [9:31:46<15:36:32, 1.32s/it] 31%|███ | 19279/61904 [9:31:48<15:41:31, 1.33s/it] 31%|███ | 19280/61904 [9:31:49<15:48:33, 1.34s/it] {'loss': 2.6631, 'learning_rate': 1.6907817969661608e-07, 'epoch': 4.98} + 31%|███ | 19280/61904 [9:31:49<15:48:33, 1.34s/it] 31%|███ | 19281/61904 [9:31:50<15:42:33, 1.33s/it] 31%|███ | 19282/61904 [9:31:52<15:51:47, 1.34s/it] 31%|███ | 19283/61904 [9:31:53<15:46:31, 1.33s/it] 31%|███ | 19284/61904 [9:31:54<16:00:44, 1.35s/it] 31%|███ | 19285/61904 [9:31:56<16:12:18, 1.37s/it] 31%|███ | 19286/61904 [9:31:57<16:31:19, 1.40s/it] 31%|███ | 19287/61904 [9:31:59<16:36:30, 1.40s/it] 31%|███ | 19288/61904 [9:32:00<16:07:36, 1.36s/it] 31%|███ | 19289/61904 [9:32:01<16:27:15, 1.39s/it] 31%|███ | 19290/61904 [9:32:03<16:33:33, 1.40s/it] 31%|███ | 19291/61904 [9:32:04<17:12:46, 1.45s/it] 31%|███ | 19292/61904 [9:32:06<16:49:01, 1.42s/it] 31%|███ | 19293/61904 [9:32:07<16:34:29, 1.40s/it] 31%|███ | 19294/61904 [9:32:08<16:02:53, 1.36s/it] 31%|███ | 19295/61904 [9:32:10<16:15:08, 1.37s/it] 31%|███ | 19296/61904 [9:32:11<15:51:13, 1.34s/it] 31%|███ | 19297/61904 [9:32:12<16:20:41, 1.38s/it] 31%|███ | 19298/61904 [9:32:14<16:47:34, 1.42s/it] 31%|███ | 19299/61904 [9:32:15<16:17:14, 1.38s/it] 31%|███ | 19300/61904 [9:32:16<16:01:08, 1.35s/it] {'loss': 2.6698, 'learning_rate': 1.6904576688707377e-07, 'epoch': 4.99} + 31%|███ | 19300/61904 [9:32:16<16:01:08, 1.35s/it] 31%|███ | 19301/61904 [9:32:18<16:40:39, 1.41s/it] 31%|███ | 19302/61904 [9:32:19<16:36:31, 1.40s/it] 31%|███ | 19303/61904 [9:32:21<16:05:43, 1.36s/it] 31%|███ | 19304/61904 [9:32:22<16:02:45, 1.36s/it] 31%|███ | 19305/61904 [9:32:23<16:00:34, 1.35s/it] 31%|███ | 19306/61904 [9:32:25<16:13:27, 1.37s/it] 31%|███ | 19307/61904 [9:32:26<16:35:23, 1.40s/it] 31%|███ | 19308/61904 [9:32:28<16:48:33, 1.42s/it] 31%|███ | 19309/61904 [9:32:29<16:17:39, 1.38s/it] 31%|███ | 19310/61904 [9:32:30<15:55:21, 1.35s/it] 31%|███ | 19311/61904 [9:32:32<16:37:50, 1.41s/it] 31%|███ | 19312/61904 [9:32:33<16:21:09, 1.38s/it] 31%|███ | 19313/61904 [9:32:35<16:23:22, 1.39s/it] 31%|███ | 19314/61904 [9:32:36<16:34:40, 1.40s/it] 31%|███ | 19315/61904 [9:32:37<16:46:03, 1.42s/it] 31%|███ | 19316/61904 [9:32:39<16:55:48, 1.43s/it] 31%|███ | 19317/61904 [9:32:40<17:08:01, 1.45s/it] 31%|███ | 19318/61904 [9:32:42<17:03:24, 1.44s/it] 31%|███ | 19319/61904 [9:32:43<17:27:04, 1.48s/it] 31%|███ | 19320/61904 [9:32:45<17:37:36, 1.49s/it] {'loss': 2.6776, 'learning_rate': 1.6901335407753143e-07, 'epoch': 4.99} + 31%|███ | 19320/61904 [9:32:45<17:37:36, 1.49s/it] 31%|███ | 19321/61904 [9:32:46<17:04:52, 1.44s/it] 31%|███ | 19322/61904 [9:32:48<16:49:31, 1.42s/it] 31%|███ | 19323/61904 [9:32:49<16:48:32, 1.42s/it] 31%|███ | 19324/61904 [9:32:50<16:39:22, 1.41s/it] 31%|███ | 19325/61904 [9:32:52<16:21:16, 1.38s/it] 31%|███ | 19326/61904 [9:32:53<16:32:46, 1.40s/it] 31%|███ | 19327/61904 [9:32:54<16:24:11, 1.39s/it] 31%|███ | 19328/61904 [9:32:56<16:26:48, 1.39s/it] 31%|███ | 19329/61904 [9:32:57<16:29:17, 1.39s/it] 31%|███ | 19330/61904 [9:32:59<15:58:01, 1.35s/it] 31%|███ | 19331/61904 [9:33:00<15:52:43, 1.34s/it] 31%|███ | 19332/61904 [9:33:01<16:07:01, 1.36s/it] 31%|███ | 19333/61904 [9:33:03<16:14:44, 1.37s/it] 31%|███ | 19334/61904 [9:33:04<16:13:24, 1.37s/it] 31%|███ | 19335/61904 [9:33:05<16:14:35, 1.37s/it] 31%|███ | 19336/61904 [9:33:07<16:30:49, 1.40s/it] 31%|███ | 19337/61904 [9:33:08<16:34:05, 1.40s/it] 31%|███ | 19338/61904 [9:33:09<15:57:47, 1.35s/it] 31%|███ | 19339/61904 [9:33:11<16:09:40, 1.37s/it] 31%|███ | 19340/61904 [9:33:12<16:42:13, 1.41s/it] {'loss': 2.7116, 'learning_rate': 1.689809412679891e-07, 'epoch': 5.0} + 31%|███ | 19340/61904 [9:33:12<16:42:13, 1.41s/it] 31%|███ | 19341/61904 [9:33:14<16:57:26, 1.43s/it] 31%|███ | 19342/61904 [9:33:15<16:11:23, 1.37s/it] 31%|███ | 19343/61904 [9:33:17<16:27:11, 1.39s/it] 31%|███ | 19344/61904 [9:33:18<16:15:10, 1.37s/it] 31%|███▏ | 19345/61904 [9:33:19<15:47:52, 1.34s/it] 31%|███▏ | 19346/61904 [9:33:21<16:07:59, 1.36s/it] 31%|███▏ | 19347/61904 [9:33:22<16:05:55, 1.36s/it]Generation Kwargs: +{'max_length': 384, 'max_gen_length': 380, 'num_beams': 5} + + 0%| | 0/861 [00:00> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 31%|███▏ | 19348/61904 [10:01:55<6084:59:50, 514.76s/it] 31%|███▏ | 19349/61904 [10:01:56<4265:00:52, 360.80s/it] 31%|███▏ | 19350/61904 [10:01:58<2990:50:56, 253.02s/it] 31%|███▏ | 19351/61904 [10:01:59<2099:02:22, 177.58s/it] 31%|███▏ | 19352/61904 [10:02:01<1474:40:32, 124.76s/it] 31%|███▏ | 19353/61904 [10:02:02<1037:24:39, 87.77s/it] 31%|███▏ | 19354/61904 [10:02:04<731:20:17, 61.88s/it] 31%|███▏ | 19355/61904 [10:02:05<516:59:58, 43.74s/it] 31%|███▏ | 19356/61904 [10:02:07<367:12:02, 31.07s/it] 31%|███▏ | 19357/61904 [10:02:08<262:09:35, 22.18s/it] 31%|███▏ | 19358/61904 [10:02:09<188:28:04, 15.95s/it] 31%|███▏ | 19359/61904 [10:02:11<136:27:42, 11.55s/it] 31%|███▏ | 19360/61904 [10:02:12<101:03:27, 8.55s/it] {'loss': 2.6478, 'learning_rate': 1.6894852845844678e-07, 'epoch': 5.0} + 31%|███▏ | 19360/61904 [10:02:12<101:03:27, 8.55s/it] 31%|███▏ | 19361/61904 [10:02:14<75:48:39, 6.42s/it] 31%|███▏ | 19362/61904 [10:02:15<58:18:49, 4.93s/it] 31%|███▏ | 19363/61904 [10:02:17<45:48:05, 3.88s/it] 31%|███▏ | 19364/61904 [10:02:18<37:14:27, 3.15s/it] 31%|███▏ | 19365/61904 [10:02:20<31:45:02, 2.69s/it] 31%|███▏ | 19366/61904 [10:02:21<27:24:31, 2.32s/it] 31%|███▏ | 19367/61904 [10:02:23<24:29:26, 2.07s/it] 31%|███▏ | 19368/61904 [10:02:24<22:34:30, 1.91s/it] 31%|███▏ | 19369/61904 [10:02:26<20:39:51, 1.75s/it] 31%|███▏ | 19370/61904 [10:02:27<19:20:59, 1.64s/it] 31%|███▏ | 19371/61904 [10:02:28<18:21:40, 1.55s/it] 31%|███▏ | 19372/61904 [10:02:30<18:21:43, 1.55s/it] 31%|███▏ | 19373/61904 [10:02:31<18:22:22, 1.56s/it] 31%|███▏ | 19374/61904 [10:02:33<18:21:14, 1.55s/it] 31%|███▏ | 19375/61904 [10:02:34<17:36:06, 1.49s/it] 31%|███▏ | 19376/61904 [10:02:36<17:15:32, 1.46s/it] 31%|███▏ | 19377/61904 [10:02:37<16:49:32, 1.42s/it] 31%|███▏ | 19378/61904 [10:02:38<16:11:26, 1.37s/it] 31%|███▏ | 19379/61904 [10:02:40<17:13:14, 1.46s/it] \ No newline at end of file