diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -52876,3 +52876,7472 @@ Use FP16 precision: False 03/05/2022 06:35:42 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) 03/05/2022 06:35:47 - INFO - codeparrot_training - Step 34999: {'lr': 0.0004411349869417247, 'samples': 17920000, 'steps': 34999, 'loss/train': 1.3265409469604492} 03/05/2022 06:35:47 - INFO - codeparrot_training - Evaluating and saving model checkpoint +03/05/2022 06:36:01 - WARNING - huggingface_hub.repository - Several commits (7) will be pushed upstream. +03/05/2022 06:36:01 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +03/05/2022 06:36:26 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/cm_code_clippy + 8908229..eeb78f1 glowing-puddle-3 -> glowing-puddle-3 + +03/05/2022 06:36:31 - INFO - codeparrot_training - Step 35000: {'lr': 0.00044113156629677313, 'samples': 17920512, 'steps': 35000, 'loss/train': 2.0515060424804688} +03/05/2022 06:36:32 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 06:36:37 - INFO - codeparrot_training - Step 35001: {'lr': 0.00044112814556570066, 'samples': 17921024, 'steps': 35001, 'loss/train': 1.841289758682251} +03/05/2022 06:36:40 - INFO - codeparrot_training - Step 35002: {'lr': 0.00044112472474850875, 'samples': 17921536, 'steps': 35002, 'loss/train': 1.5218960046768188} +03/05/2022 06:36:40 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/05/2022 06:36:45 - INFO - codeparrot_training - Step 35003: {'lr': 0.000441121303845199, 'samples': 17922048, 'steps': 35003, 'loss/train': 2.270785331726074} +03/05/2022 06:36:48 - INFO - codeparrot_training - Step 35004: {'lr': 0.0004411178828557729, 'samples': 17922560, 'steps': 35004, 'loss/train': 1.8769875764846802} +03/05/2022 06:36:49 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 06:36:53 - INFO - codeparrot_training - Step 35005: {'lr': 0.00044111446178023205, 'samples': 17923072, 'steps': 35005, 'loss/train': 1.5740808248519897} +03/05/2022 06:36:57 - INFO - codeparrot_training - Step 35006: {'lr': 0.000441111040618578, 'samples': 17923584, 'steps': 35006, 'loss/train': 1.4152005910873413} +03/05/2022 06:36:57 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/05/2022 06:37:02 - INFO - codeparrot_training - Step 35007: {'lr': 0.0004411076193708122, 'samples': 17924096, 'steps': 35007, 'loss/train': 2.161975622177124} +03/05/2022 06:37:05 - INFO - codeparrot_training - Step 35008: {'lr': 0.00044110419803693635, 'samples': 17924608, 'steps': 35008, 'loss/train': 1.4361581802368164} +03/05/2022 06:37:05 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 06:37:10 - INFO - codeparrot_training - Step 35009: {'lr': 0.00044110077661695194, 'samples': 17925120, 'steps': 35009, 'loss/train': 1.1023637056350708} +03/05/2022 06:37:13 - INFO - codeparrot_training - Step 35010: {'lr': 0.00044109735511086036, 'samples': 17925632, 'steps': 35010, 'loss/train': 0.7857583165168762} +03/05/2022 06:37:14 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/05/2022 06:37:19 - INFO - codeparrot_training - Step 35011: {'lr': 0.00044109393351866324, 'samples': 17926144, 'steps': 35011, 'loss/train': 1.357399344444275} +03/05/2022 06:37:22 - INFO - codeparrot_training - Step 35012: {'lr': 0.0004410905118403622, 'samples': 17926656, 'steps': 35012, 'loss/train': 1.7133471965789795} +03/05/2022 06:37:22 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 06:37:27 - INFO - codeparrot_training - Step 35013: {'lr': 0.0004410870900759587, 'samples': 17927168, 'steps': 35013, 'loss/train': 1.2567002773284912} +03/05/2022 06:37:30 - INFO - codeparrot_training - Step 35014: {'lr': 0.0004410836682254543, 'samples': 17927680, 'steps': 35014, 'loss/train': 1.4750494956970215} +03/05/2022 06:37:30 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/05/2022 06:37:36 - INFO - codeparrot_training - Step 35015: {'lr': 0.0004410802462888506, 'samples': 17928192, 'steps': 35015, 'loss/train': 1.8269269466400146} +03/05/2022 06:37:39 - INFO - codeparrot_training - Step 35016: {'lr': 0.00044107682426614903, 'samples': 17928704, 'steps': 35016, 'loss/train': 1.7890626192092896} +03/05/2022 06:37:39 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/05/2022 06:37:44 - INFO - codeparrot_training - Step 35017: {'lr': 0.00044107340215735125, 'samples': 17929216, 'steps': 35017, 'loss/train': 1.8668313026428223} +03/05/2022 06:37:47 - INFO - codeparrot_training - Step 35018: {'lr': 0.00044106997996245866, 'samples': 17929728, 'steps': 35018, 'loss/train': 0.7927989363670349} +03/05/2022 06:37:48 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/05/2022 06:37:53 - INFO - codeparrot_training - Step 35019: {'lr': 0.000441066557681473, 'samples': 17930240, 'steps': 35019, 'loss/train': 1.561155915260315} +03/05/2022 06:37:56 - INFO - codeparrot_training - Step 35020: {'lr': 0.00044106313531439565, 'samples': 17930752, 'steps': 35020, 'loss/train': 1.696244716644287} +03/05/2022 06:37:56 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/05/2022 06:38:01 - INFO - codeparrot_training - Step 35021: {'lr': 0.00044105971286122816, 'samples': 17931264, 'steps': 35021, 'loss/train': 1.4708157777786255} +03/05/2022 06:38:04 - INFO - codeparrot_training - Step 35022: {'lr': 0.00044105629032197214, 'samples': 17931776, 'steps': 35022, 'loss/train': 1.0895287990570068} +03/05/2022 06:38:04 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/05/2022 06:38:09 - INFO - codeparrot_training - Step 35023: {'lr': 0.0004410528676966291, 'samples': 17932288, 'steps': 35023, 'loss/train': 2.0822582244873047} +03/05/2022 06:38:12 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 06:38:15 - INFO - codeparrot_training - Step 35024: {'lr': 0.00044104944498520054, 'samples': 17932800, 'steps': 35024, 'loss/train': 2.231043577194214} +03/05/2022 06:38:18 - INFO - codeparrot_training - Step 35025: {'lr': 0.00044104602218768805, 'samples': 17933312, 'steps': 35025, 'loss/train': 0.7925736904144287} +03/05/2022 06:38:21 - INFO - codeparrot_training - Step 35026: {'lr': 0.0004410425993040933, 'samples': 17933824, 'steps': 35026, 'loss/train': 0.7658699750900269} +03/05/2022 06:38:21 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/05/2022 06:38:26 - INFO - codeparrot_training - Step 35027: {'lr': 0.0004410391763344176, 'samples': 17934336, 'steps': 35027, 'loss/train': 1.8230595588684082} +03/05/2022 06:38:29 - INFO - codeparrot_training - Step 35028: {'lr': 0.00044103575327866264, 'samples': 17934848, 'steps': 35028, 'loss/train': 0.8513350486755371} +03/05/2022 06:38:30 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 06:38:35 - INFO - codeparrot_training - Step 35029: {'lr': 0.0004410323301368299, 'samples': 17935360, 'steps': 35029, 'loss/train': 1.2549076080322266} +03/05/2022 06:38:38 - INFO - codeparrot_training - Step 35030: {'lr': 0.0004410289069089209, 'samples': 17935872, 'steps': 35030, 'loss/train': 0.9950276017189026} +03/05/2022 06:38:38 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/05/2022 06:38:43 - INFO - codeparrot_training - Step 35031: {'lr': 0.0004410254835949372, 'samples': 17936384, 'steps': 35031, 'loss/train': 1.5624009370803833} +03/05/2022 06:38:46 - INFO - codeparrot_training - Step 35032: {'lr': 0.00044102206019488045, 'samples': 17936896, 'steps': 35032, 'loss/train': 1.736935019493103} +03/05/2022 06:38:46 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/05/2022 06:38:51 - INFO - codeparrot_training - Step 35033: {'lr': 0.00044101863670875207, 'samples': 17937408, 'steps': 35033, 'loss/train': 1.5070401430130005} +03/05/2022 06:38:55 - INFO - codeparrot_training - Step 35034: {'lr': 0.0004410152131365536, 'samples': 17937920, 'steps': 35034, 'loss/train': 1.9382750988006592} +03/05/2022 06:38:55 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/05/2022 06:39:00 - INFO - codeparrot_training - Step 35035: {'lr': 0.00044101178947828667, 'samples': 17938432, 'steps': 35035, 'loss/train': 1.5708887577056885} +03/05/2022 06:39:03 - INFO - codeparrot_training - Step 35036: {'lr': 0.0004410083657339528, 'samples': 17938944, 'steps': 35036, 'loss/train': 0.526353657245636} +03/05/2022 06:39:03 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/05/2022 06:39:09 - INFO - codeparrot_training - Step 35037: {'lr': 0.00044100494190355347, 'samples': 17939456, 'steps': 35037, 'loss/train': 1.95433509349823} +03/05/2022 06:39:12 - INFO - codeparrot_training - Step 35038: {'lr': 0.0004410015179870903, 'samples': 17939968, 'steps': 35038, 'loss/train': 2.5283753871917725} +03/05/2022 06:39:12 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 06:39:17 - INFO - codeparrot_training - Step 35039: {'lr': 0.0004409980939845647, 'samples': 17940480, 'steps': 35039, 'loss/train': 0.9857442378997803} +03/05/2022 06:39:20 - INFO - codeparrot_training - Step 35040: {'lr': 0.00044099466989597837, 'samples': 17940992, 'steps': 35040, 'loss/train': 1.505144715309143} +03/05/2022 06:39:20 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/05/2022 06:39:25 - INFO - codeparrot_training - Step 35041: {'lr': 0.00044099124572133283, 'samples': 17941504, 'steps': 35041, 'loss/train': 0.5281742215156555} +03/05/2022 06:39:29 - INFO - codeparrot_training - Step 35042: {'lr': 0.00044098782146062955, 'samples': 17942016, 'steps': 35042, 'loss/train': 2.0821478366851807} +03/05/2022 06:39:29 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 06:39:34 - INFO - codeparrot_training - Step 35043: {'lr': 0.00044098439711387006, 'samples': 17942528, 'steps': 35043, 'loss/train': 1.319618821144104} +03/05/2022 06:39:37 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/05/2022 06:39:39 - INFO - codeparrot_training - Step 35044: {'lr': 0.000440980972681056, 'samples': 17943040, 'steps': 35044, 'loss/train': 1.1478744745254517} +03/05/2022 06:39:42 - INFO - codeparrot_training - Step 35045: {'lr': 0.0004409775481621888, 'samples': 17943552, 'steps': 35045, 'loss/train': 2.1830549240112305} +03/05/2022 06:39:46 - INFO - codeparrot_training - Step 35046: {'lr': 0.0004409741235572701, 'samples': 17944064, 'steps': 35046, 'loss/train': 1.6463422775268555} +03/05/2022 06:39:47 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 06:39:51 - INFO - codeparrot_training - Step 35047: {'lr': 0.0004409706988663015, 'samples': 17944576, 'steps': 35047, 'loss/train': 1.2908602952957153} +03/05/2022 06:39:54 - INFO - codeparrot_training - Step 35048: {'lr': 0.00044096727408928426, 'samples': 17945088, 'steps': 35048, 'loss/train': 2.0445375442504883} +03/05/2022 06:39:55 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 06:40:00 - INFO - codeparrot_training - Step 35049: {'lr': 0.0004409638492262202, 'samples': 17945600, 'steps': 35049, 'loss/train': 0.5649662017822266} +03/05/2022 06:40:03 - INFO - codeparrot_training - Step 35050: {'lr': 0.0004409604242771108, 'samples': 17946112, 'steps': 35050, 'loss/train': 0.9336749911308289} +03/05/2022 06:40:05 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/05/2022 06:40:08 - INFO - codeparrot_training - Step 35051: {'lr': 0.0004409569992419576, 'samples': 17946624, 'steps': 35051, 'loss/train': 1.5632200241088867} +03/05/2022 06:40:11 - INFO - codeparrot_training - Step 35052: {'lr': 0.0004409535741207621, 'samples': 17947136, 'steps': 35052, 'loss/train': 1.0504883527755737} +03/05/2022 06:40:14 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 06:40:17 - INFO - codeparrot_training - Step 35053: {'lr': 0.00044095014891352584, 'samples': 17947648, 'steps': 35053, 'loss/train': 1.9159879684448242} +03/05/2022 06:40:20 - INFO - codeparrot_training - Step 35054: {'lr': 0.0004409467236202505, 'samples': 17948160, 'steps': 35054, 'loss/train': 2.7451696395874023} +03/05/2022 06:40:23 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/05/2022 06:40:25 - INFO - codeparrot_training - Step 35055: {'lr': 0.0004409432982409374, 'samples': 17948672, 'steps': 35055, 'loss/train': 1.2476372718811035} +03/05/2022 06:40:28 - INFO - codeparrot_training - Step 35056: {'lr': 0.0004409398727755882, 'samples': 17949184, 'steps': 35056, 'loss/train': 1.654170036315918} +03/05/2022 06:40:31 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 06:40:34 - INFO - codeparrot_training - Step 35057: {'lr': 0.00044093644722420445, 'samples': 17949696, 'steps': 35057, 'loss/train': 1.350480079650879} +03/05/2022 06:40:37 - INFO - codeparrot_training - Step 35058: {'lr': 0.00044093302158678766, 'samples': 17950208, 'steps': 35058, 'loss/train': 2.369255542755127} +03/05/2022 06:40:40 - INFO - codeparrot_training - Step 35059: {'lr': 0.0004409295958633394, 'samples': 17950720, 'steps': 35059, 'loss/train': 1.305041790008545} +03/05/2022 06:40:40 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 06:40:45 - INFO - codeparrot_training - Step 35060: {'lr': 0.00044092617005386125, 'samples': 17951232, 'steps': 35060, 'loss/train': 0.16362528502941132} +03/05/2022 06:40:48 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 06:40:51 - INFO - codeparrot_training - Step 35061: {'lr': 0.00044092274415835473, 'samples': 17951744, 'steps': 35061, 'loss/train': 2.0974016189575195} +03/05/2022 06:40:54 - INFO - codeparrot_training - Step 35062: {'lr': 0.0004409193181768213, 'samples': 17952256, 'steps': 35062, 'loss/train': 1.3911359310150146} +03/05/2022 06:40:57 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 06:40:59 - INFO - codeparrot_training - Step 35063: {'lr': 0.00044091589210926266, 'samples': 17952768, 'steps': 35063, 'loss/train': 1.4589629173278809} +03/05/2022 06:41:03 - INFO - codeparrot_training - Step 35064: {'lr': 0.00044091246595568025, 'samples': 17953280, 'steps': 35064, 'loss/train': 1.43472421169281} +03/05/2022 06:41:05 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 06:41:08 - INFO - codeparrot_training - Step 35065: {'lr': 0.00044090903971607555, 'samples': 17953792, 'steps': 35065, 'loss/train': 1.3697776794433594} +03/05/2022 06:41:11 - INFO - codeparrot_training - Step 35066: {'lr': 0.0004409056133904502, 'samples': 17954304, 'steps': 35066, 'loss/train': 1.565011978149414} +03/05/2022 06:41:14 - INFO - codeparrot_training - Step 35067: {'lr': 0.00044090218697880577, 'samples': 17954816, 'steps': 35067, 'loss/train': 2.194840669631958} +03/05/2022 06:41:15 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 06:41:20 - INFO - codeparrot_training - Step 35068: {'lr': 0.0004408987604811437, 'samples': 17955328, 'steps': 35068, 'loss/train': 1.7123676538467407} +03/05/2022 06:41:23 - INFO - codeparrot_training - Step 35069: {'lr': 0.00044089533389746573, 'samples': 17955840, 'steps': 35069, 'loss/train': 1.6059235334396362} +03/05/2022 06:41:23 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 06:41:28 - INFO - codeparrot_training - Step 35070: {'lr': 0.00044089190722777316, 'samples': 17956352, 'steps': 35070, 'loss/train': 0.9541943669319153} +03/05/2022 06:41:31 - INFO - codeparrot_training - Step 35071: {'lr': 0.00044088848047206763, 'samples': 17956864, 'steps': 35071, 'loss/train': 1.0749815702438354} +03/05/2022 06:41:32 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 06:41:36 - INFO - codeparrot_training - Step 35072: {'lr': 0.0004408850536303507, 'samples': 17957376, 'steps': 35072, 'loss/train': 0.786578357219696} +03/05/2022 06:41:40 - INFO - codeparrot_training - Step 35073: {'lr': 0.000440881626702624, 'samples': 17957888, 'steps': 35073, 'loss/train': 1.417797565460205} +03/05/2022 06:41:40 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/05/2022 06:41:45 - INFO - codeparrot_training - Step 35074: {'lr': 0.00044087819968888887, 'samples': 17958400, 'steps': 35074, 'loss/train': 1.875423550605774} +03/05/2022 06:41:48 - INFO - codeparrot_training - Step 35075: {'lr': 0.00044087477258914696, 'samples': 17958912, 'steps': 35075, 'loss/train': 1.9950605630874634} +03/05/2022 06:41:48 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 06:41:53 - INFO - codeparrot_training - Step 35076: {'lr': 0.00044087134540339996, 'samples': 17959424, 'steps': 35076, 'loss/train': 1.7342673540115356} +03/05/2022 06:41:56 - INFO - codeparrot_training - Step 35077: {'lr': 0.00044086791813164916, 'samples': 17959936, 'steps': 35077, 'loss/train': 1.705446720123291} +03/05/2022 06:41:56 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 06:42:02 - INFO - codeparrot_training - Step 35078: {'lr': 0.00044086449077389636, 'samples': 17960448, 'steps': 35078, 'loss/train': 0.1058739721775055} +03/05/2022 06:42:05 - INFO - codeparrot_training - Step 35079: {'lr': 0.0004408610633301428, 'samples': 17960960, 'steps': 35079, 'loss/train': 1.584465742111206} +03/05/2022 06:42:10 - INFO - codeparrot_training - Step 35080: {'lr': 0.00044085763580039027, 'samples': 17961472, 'steps': 35080, 'loss/train': 1.4333089590072632} +03/05/2022 06:42:13 - INFO - codeparrot_training - Step 35081: {'lr': 0.0004408542081846402, 'samples': 17961984, 'steps': 35081, 'loss/train': 1.781825304031372} +03/05/2022 06:42:13 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 06:42:19 - INFO - codeparrot_training - Step 35082: {'lr': 0.0004408507804828942, 'samples': 17962496, 'steps': 35082, 'loss/train': 1.1559191942214966} +03/05/2022 06:42:22 - INFO - codeparrot_training - Step 35083: {'lr': 0.00044084735269515375, 'samples': 17963008, 'steps': 35083, 'loss/train': 1.3996740579605103} +03/05/2022 06:42:22 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/05/2022 06:42:27 - INFO - codeparrot_training - Step 35084: {'lr': 0.0004408439248214205, 'samples': 17963520, 'steps': 35084, 'loss/train': 1.9162170886993408} +03/05/2022 06:42:30 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 06:42:32 - INFO - codeparrot_training - Step 35085: {'lr': 0.00044084049686169584, 'samples': 17964032, 'steps': 35085, 'loss/train': 1.9303969144821167} +03/05/2022 06:42:36 - INFO - codeparrot_training - Step 35086: {'lr': 0.00044083706881598147, 'samples': 17964544, 'steps': 35086, 'loss/train': 1.463670253753662} +03/05/2022 06:42:38 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 06:42:41 - INFO - codeparrot_training - Step 35087: {'lr': 0.00044083364068427875, 'samples': 17965056, 'steps': 35087, 'loss/train': 2.4930057525634766} +03/05/2022 06:42:44 - INFO - codeparrot_training - Step 35088: {'lr': 0.0004408302124665894, 'samples': 17965568, 'steps': 35088, 'loss/train': 1.467700481414795} +03/05/2022 06:42:47 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 06:42:49 - INFO - codeparrot_training - Step 35089: {'lr': 0.00044082678416291495, 'samples': 17966080, 'steps': 35089, 'loss/train': 0.14983738958835602} +03/05/2022 06:42:53 - INFO - codeparrot_training - Step 35090: {'lr': 0.00044082335577325685, 'samples': 17966592, 'steps': 35090, 'loss/train': 1.2743892669677734} +03/05/2022 06:42:55 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/05/2022 06:42:58 - INFO - codeparrot_training - Step 35091: {'lr': 0.0004408199272976167, 'samples': 17967104, 'steps': 35091, 'loss/train': 1.7178865671157837} +03/05/2022 06:43:01 - INFO - codeparrot_training - Step 35092: {'lr': 0.00044081649873599604, 'samples': 17967616, 'steps': 35092, 'loss/train': 1.4637819528579712} +03/05/2022 06:43:03 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/05/2022 06:43:06 - INFO - codeparrot_training - Step 35093: {'lr': 0.0004408130700883964, 'samples': 17968128, 'steps': 35093, 'loss/train': 1.6614797115325928} +03/05/2022 06:43:09 - INFO - codeparrot_training - Step 35094: {'lr': 0.0004408096413548193, 'samples': 17968640, 'steps': 35094, 'loss/train': 2.067784309387207} +03/05/2022 06:43:11 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 06:43:15 - INFO - codeparrot_training - Step 35095: {'lr': 0.00044080621253526637, 'samples': 17969152, 'steps': 35095, 'loss/train': 1.471561074256897} +03/05/2022 06:43:18 - INFO - codeparrot_training - Step 35096: {'lr': 0.00044080278362973913, 'samples': 17969664, 'steps': 35096, 'loss/train': 1.6416233777999878} +03/05/2022 06:43:20 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 06:43:23 - INFO - codeparrot_training - Step 35097: {'lr': 0.00044079935463823904, 'samples': 17970176, 'steps': 35097, 'loss/train': 1.6243948936462402} +03/05/2022 06:43:26 - INFO - codeparrot_training - Step 35098: {'lr': 0.00044079592556076774, 'samples': 17970688, 'steps': 35098, 'loss/train': 0.5875405669212341} +03/05/2022 06:43:30 - INFO - codeparrot_training - Step 35099: {'lr': 0.00044079249639732664, 'samples': 17971200, 'steps': 35099, 'loss/train': 1.6715366840362549} +03/05/2022 06:43:30 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 06:43:35 - INFO - codeparrot_training - Step 35100: {'lr': 0.00044078906714791757, 'samples': 17971712, 'steps': 35100, 'loss/train': 2.206998109817505} +03/05/2022 06:43:38 - INFO - codeparrot_training - Step 35101: {'lr': 0.0004407856378125418, 'samples': 17972224, 'steps': 35101, 'loss/train': 2.1837384700775146} +03/05/2022 06:43:38 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 06:43:44 - INFO - codeparrot_training - Step 35102: {'lr': 0.00044078220839120086, 'samples': 17972736, 'steps': 35102, 'loss/train': 1.177858591079712} +03/05/2022 06:43:46 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 06:43:49 - INFO - codeparrot_training - Step 35103: {'lr': 0.0004407787788838966, 'samples': 17973248, 'steps': 35103, 'loss/train': 1.4679001569747925} +03/05/2022 06:43:52 - INFO - codeparrot_training - Step 35104: {'lr': 0.00044077534929063024, 'samples': 17973760, 'steps': 35104, 'loss/train': 1.8608607053756714} +03/05/2022 06:43:55 - INFO - codeparrot_training - Step 35105: {'lr': 0.00044077191961140337, 'samples': 17974272, 'steps': 35105, 'loss/train': 1.519514799118042} +03/05/2022 06:43:55 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/05/2022 06:44:01 - INFO - codeparrot_training - Step 35106: {'lr': 0.00044076848984621775, 'samples': 17974784, 'steps': 35106, 'loss/train': 1.794328212738037} +03/05/2022 06:44:04 - INFO - codeparrot_training - Step 35107: {'lr': 0.00044076505999507474, 'samples': 17975296, 'steps': 35107, 'loss/train': 1.7238179445266724} +03/05/2022 06:44:04 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 06:44:09 - INFO - codeparrot_training - Step 35108: {'lr': 0.00044076163005797597, 'samples': 17975808, 'steps': 35108, 'loss/train': 1.834072470664978} +03/05/2022 06:44:12 - INFO - codeparrot_training - Step 35109: {'lr': 0.00044075820003492295, 'samples': 17976320, 'steps': 35109, 'loss/train': 1.9181102514266968} +03/05/2022 06:44:12 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/05/2022 06:44:17 - INFO - codeparrot_training - Step 35110: {'lr': 0.0004407547699259173, 'samples': 17976832, 'steps': 35110, 'loss/train': 3.2535603046417236} +03/05/2022 06:44:21 - INFO - codeparrot_training - Step 35111: {'lr': 0.0004407513397309604, 'samples': 17977344, 'steps': 35111, 'loss/train': 1.0164982080459595} +03/05/2022 06:44:21 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 06:44:26 - INFO - codeparrot_training - Step 35112: {'lr': 0.0004407479094500539, 'samples': 17977856, 'steps': 35112, 'loss/train': 1.3741482496261597} +03/05/2022 06:44:29 - INFO - codeparrot_training - Step 35113: {'lr': 0.00044074447908319935, 'samples': 17978368, 'steps': 35113, 'loss/train': 1.7950351238250732} +03/05/2022 06:44:29 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/05/2022 06:44:34 - INFO - codeparrot_training - Step 35114: {'lr': 0.0004407410486303983, 'samples': 17978880, 'steps': 35114, 'loss/train': 1.2568387985229492} +03/05/2022 06:44:37 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 06:44:40 - INFO - codeparrot_training - Step 35115: {'lr': 0.0004407376180916522, 'samples': 17979392, 'steps': 35115, 'loss/train': 1.7677689790725708} +03/05/2022 06:44:43 - INFO - codeparrot_training - Step 35116: {'lr': 0.0004407341874669627, 'samples': 17979904, 'steps': 35116, 'loss/train': 1.66059148311615} +03/05/2022 06:44:46 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/05/2022 06:44:48 - INFO - codeparrot_training - Step 35117: {'lr': 0.00044073075675633134, 'samples': 17980416, 'steps': 35117, 'loss/train': 1.6717792749404907} +03/05/2022 06:44:51 - INFO - codeparrot_training - Step 35118: {'lr': 0.0004407273259597597, 'samples': 17980928, 'steps': 35118, 'loss/train': 2.05826473236084} +03/05/2022 06:44:54 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 06:44:57 - INFO - codeparrot_training - Step 35119: {'lr': 0.0004407238950772492, 'samples': 17981440, 'steps': 35119, 'loss/train': 1.5202373266220093} +03/05/2022 06:45:00 - INFO - codeparrot_training - Step 35120: {'lr': 0.00044072046410880143, 'samples': 17981952, 'steps': 35120, 'loss/train': 1.5986127853393555} +03/05/2022 06:45:02 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 06:45:05 - INFO - codeparrot_training - Step 35121: {'lr': 0.000440717033054418, 'samples': 17982464, 'steps': 35121, 'loss/train': 1.7005702257156372} +03/05/2022 06:45:08 - INFO - codeparrot_training - Step 35122: {'lr': 0.0004407136019141005, 'samples': 17982976, 'steps': 35122, 'loss/train': 0.833538293838501} +03/05/2022 06:45:11 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 06:45:13 - INFO - codeparrot_training - Step 35123: {'lr': 0.0004407101706878502, 'samples': 17983488, 'steps': 35123, 'loss/train': 1.7292698621749878} +03/05/2022 06:45:17 - INFO - codeparrot_training - Step 35124: {'lr': 0.000440706739375669, 'samples': 17984000, 'steps': 35124, 'loss/train': 2.265519380569458} +03/05/2022 06:45:19 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 06:45:22 - INFO - codeparrot_training - Step 35125: {'lr': 0.00044070330797755825, 'samples': 17984512, 'steps': 35125, 'loss/train': 1.659589171409607} +03/05/2022 06:45:25 - INFO - codeparrot_training - Step 35126: {'lr': 0.0004406998764935195, 'samples': 17985024, 'steps': 35126, 'loss/train': 2.4351227283477783} +03/05/2022 06:45:27 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/05/2022 06:45:30 - INFO - codeparrot_training - Step 35127: {'lr': 0.0004406964449235544, 'samples': 17985536, 'steps': 35127, 'loss/train': 1.5124415159225464} +03/05/2022 06:45:34 - INFO - codeparrot_training - Step 35128: {'lr': 0.00044069301326766434, 'samples': 17986048, 'steps': 35128, 'loss/train': 1.4938569068908691} +03/05/2022 06:45:36 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 06:45:39 - INFO - codeparrot_training - Step 35129: {'lr': 0.00044068958152585104, 'samples': 17986560, 'steps': 35129, 'loss/train': 1.3420312404632568} +03/05/2022 06:45:42 - INFO - codeparrot_training - Step 35130: {'lr': 0.00044068614969811586, 'samples': 17987072, 'steps': 35130, 'loss/train': 0.8191577792167664} +03/05/2022 06:45:45 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/05/2022 06:45:48 - INFO - codeparrot_training - Step 35131: {'lr': 0.0004406827177844605, 'samples': 17987584, 'steps': 35131, 'loss/train': 2.0152664184570312} +03/05/2022 06:45:51 - INFO - codeparrot_training - Step 35132: {'lr': 0.00044067928578488645, 'samples': 17988096, 'steps': 35132, 'loss/train': 1.3087942600250244} +03/05/2022 06:45:53 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 06:45:56 - INFO - codeparrot_training - Step 35133: {'lr': 0.0004406758536993952, 'samples': 17988608, 'steps': 35133, 'loss/train': 2.6158320903778076} +03/05/2022 06:45:59 - INFO - codeparrot_training - Step 35134: {'lr': 0.00044067242152798843, 'samples': 17989120, 'steps': 35134, 'loss/train': 1.7170389890670776} +03/05/2022 06:46:02 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/05/2022 06:46:04 - INFO - codeparrot_training - Step 35135: {'lr': 0.00044066898927066757, 'samples': 17989632, 'steps': 35135, 'loss/train': 0.961797297000885} +03/05/2022 06:46:08 - INFO - codeparrot_training - Step 35136: {'lr': 0.0004406655569274342, 'samples': 17990144, 'steps': 35136, 'loss/train': 2.1463520526885986} +03/05/2022 06:46:10 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 06:46:13 - INFO - codeparrot_training - Step 35137: {'lr': 0.0004406621244982899, 'samples': 17990656, 'steps': 35137, 'loss/train': 1.3117103576660156} +03/05/2022 06:46:16 - INFO - codeparrot_training - Step 35138: {'lr': 0.00044065869198323614, 'samples': 17991168, 'steps': 35138, 'loss/train': 1.9598625898361206} +03/05/2022 06:46:18 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 06:46:21 - INFO - codeparrot_training - Step 35139: {'lr': 0.0004406552593822746, 'samples': 17991680, 'steps': 35139, 'loss/train': 1.6232032775878906} +03/05/2022 06:46:24 - INFO - codeparrot_training - Step 35140: {'lr': 0.00044065182669540665, 'samples': 17992192, 'steps': 35140, 'loss/train': 1.843003749847412} +03/05/2022 06:46:27 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 06:46:30 - INFO - codeparrot_training - Step 35141: {'lr': 0.000440648393922634, 'samples': 17992704, 'steps': 35141, 'loss/train': 2.109143018722534} +03/05/2022 06:46:33 - INFO - codeparrot_training - Step 35142: {'lr': 0.0004406449610639581, 'samples': 17993216, 'steps': 35142, 'loss/train': 1.9507811069488525} +03/05/2022 06:46:36 - INFO - codeparrot_training - Step 35143: {'lr': 0.0004406415281193805, 'samples': 17993728, 'steps': 35143, 'loss/train': 1.6532790660858154} +03/05/2022 06:46:36 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/05/2022 06:46:42 - INFO - codeparrot_training - Step 35144: {'lr': 0.0004406380950889027, 'samples': 17994240, 'steps': 35144, 'loss/train': 1.553581953048706} +03/05/2022 06:46:45 - INFO - codeparrot_training - Step 35145: {'lr': 0.0004406346619725265, 'samples': 17994752, 'steps': 35145, 'loss/train': 1.8314599990844727} +03/05/2022 06:46:45 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 06:46:50 - INFO - codeparrot_training - Step 35146: {'lr': 0.00044063122877025315, 'samples': 17995264, 'steps': 35146, 'loss/train': 2.3224868774414062} +03/05/2022 06:46:53 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 06:46:56 - INFO - codeparrot_training - Step 35147: {'lr': 0.0004406277954820843, 'samples': 17995776, 'steps': 35147, 'loss/train': 1.8385311365127563} +03/05/2022 06:46:59 - INFO - codeparrot_training - Step 35148: {'lr': 0.0004406243621080216, 'samples': 17996288, 'steps': 35148, 'loss/train': 2.3053135871887207} +03/05/2022 06:47:01 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 06:47:04 - INFO - codeparrot_training - Step 35149: {'lr': 0.00044062092864806634, 'samples': 17996800, 'steps': 35149, 'loss/train': 1.9078410863876343} +03/05/2022 06:47:07 - INFO - codeparrot_training - Step 35150: {'lr': 0.00044061749510222037, 'samples': 17997312, 'steps': 35150, 'loss/train': 1.633927822113037} +03/05/2022 06:47:10 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 06:47:12 - INFO - codeparrot_training - Step 35151: {'lr': 0.00044061406147048504, 'samples': 17997824, 'steps': 35151, 'loss/train': 2.0847232341766357} +03/05/2022 06:47:16 - INFO - codeparrot_training - Step 35152: {'lr': 0.000440610627752862, 'samples': 17998336, 'steps': 35152, 'loss/train': 2.969688653945923} +03/05/2022 06:47:18 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 06:47:21 - INFO - codeparrot_training - Step 35153: {'lr': 0.00044060719394935265, 'samples': 17998848, 'steps': 35153, 'loss/train': 2.382965087890625} +03/05/2022 06:47:24 - INFO - codeparrot_training - Step 35154: {'lr': 0.0004406037600599588, 'samples': 17999360, 'steps': 35154, 'loss/train': 2.2540781497955322} +03/05/2022 06:47:26 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 06:47:29 - INFO - codeparrot_training - Step 35155: {'lr': 0.0004406003260846817, 'samples': 17999872, 'steps': 35155, 'loss/train': 1.8190860748291016} +03/05/2022 06:47:32 - INFO - codeparrot_training - Step 35156: {'lr': 0.0004405968920235231, 'samples': 18000384, 'steps': 35156, 'loss/train': 1.9425158500671387} +03/05/2022 06:47:35 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/05/2022 06:47:38 - INFO - codeparrot_training - Step 35157: {'lr': 0.0004405934578764845, 'samples': 18000896, 'steps': 35157, 'loss/train': 2.1428630352020264} +03/05/2022 06:47:41 - INFO - codeparrot_training - Step 35158: {'lr': 0.0004405900236435674, 'samples': 18001408, 'steps': 35158, 'loss/train': 1.6020028591156006} +03/05/2022 06:47:43 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 06:47:46 - INFO - codeparrot_training - Step 35159: {'lr': 0.00044058658932477336, 'samples': 18001920, 'steps': 35159, 'loss/train': 1.2939059734344482} +03/05/2022 06:47:49 - INFO - codeparrot_training - Step 35160: {'lr': 0.0004405831549201039, 'samples': 18002432, 'steps': 35160, 'loss/train': 1.7397069931030273} +03/05/2022 06:47:51 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/05/2022 06:47:55 - INFO - codeparrot_training - Step 35161: {'lr': 0.0004405797204295607, 'samples': 18002944, 'steps': 35161, 'loss/train': 1.682003140449524} +03/05/2022 06:47:58 - INFO - codeparrot_training - Step 35162: {'lr': 0.0004405762858531451, 'samples': 18003456, 'steps': 35162, 'loss/train': 2.0181326866149902} +03/05/2022 06:48:00 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 06:48:03 - INFO - codeparrot_training - Step 35163: {'lr': 0.00044057285119085887, 'samples': 18003968, 'steps': 35163, 'loss/train': 2.097601890563965} +03/05/2022 06:48:06 - INFO - codeparrot_training - Step 35164: {'lr': 0.0004405694164427035, 'samples': 18004480, 'steps': 35164, 'loss/train': 0.9529314637184143} +03/05/2022 06:48:08 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/05/2022 06:48:11 - INFO - codeparrot_training - Step 35165: {'lr': 0.0004405659816086804, 'samples': 18004992, 'steps': 35165, 'loss/train': 1.4942588806152344} +03/05/2022 06:48:15 - INFO - codeparrot_training - Step 35166: {'lr': 0.00044056254668879127, 'samples': 18005504, 'steps': 35166, 'loss/train': 2.0905256271362305} +03/05/2022 06:48:17 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/05/2022 06:48:20 - INFO - codeparrot_training - Step 35167: {'lr': 0.00044055911168303753, 'samples': 18006016, 'steps': 35167, 'loss/train': 1.5813617706298828} +03/05/2022 06:48:23 - INFO - codeparrot_training - Step 35168: {'lr': 0.00044055567659142083, 'samples': 18006528, 'steps': 35168, 'loss/train': 2.3200302124023438} +03/05/2022 06:48:26 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 06:48:29 - INFO - codeparrot_training - Step 35169: {'lr': 0.0004405522414139427, 'samples': 18007040, 'steps': 35169, 'loss/train': 2.1776938438415527} +03/05/2022 06:48:32 - INFO - codeparrot_training - Step 35170: {'lr': 0.0004405488061506047, 'samples': 18007552, 'steps': 35170, 'loss/train': 1.9812142848968506} +03/05/2022 06:48:35 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 06:48:37 - INFO - codeparrot_training - Step 35171: {'lr': 0.0004405453708014082, 'samples': 18008064, 'steps': 35171, 'loss/train': 2.974316120147705} +03/05/2022 06:48:40 - INFO - codeparrot_training - Step 35172: {'lr': 0.00044054193536635503, 'samples': 18008576, 'steps': 35172, 'loss/train': 0.9614003896713257} +03/05/2022 06:48:44 - INFO - codeparrot_training - Step 35173: {'lr': 0.00044053849984544653, 'samples': 18009088, 'steps': 35173, 'loss/train': 1.5257419347763062} +03/05/2022 06:48:44 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/05/2022 06:48:49 - INFO - codeparrot_training - Step 35174: {'lr': 0.0004405350642386844, 'samples': 18009600, 'steps': 35174, 'loss/train': 1.924976110458374} +03/05/2022 06:48:52 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 06:48:55 - INFO - codeparrot_training - Step 35175: {'lr': 0.00044053162854607004, 'samples': 18010112, 'steps': 35175, 'loss/train': 2.0167171955108643} +03/05/2022 06:48:58 - INFO - codeparrot_training - Step 35176: {'lr': 0.0004405281927676051, 'samples': 18010624, 'steps': 35176, 'loss/train': 0.6707471609115601} +03/05/2022 06:49:01 - INFO - codeparrot_training - Step 35177: {'lr': 0.0004405247569032911, 'samples': 18011136, 'steps': 35177, 'loss/train': 0.6019644141197205} +03/05/2022 06:49:03 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/05/2022 06:49:06 - INFO - codeparrot_training - Step 35178: {'lr': 0.00044052132095312956, 'samples': 18011648, 'steps': 35178, 'loss/train': 1.0000718832015991} +03/05/2022 06:49:10 - INFO - codeparrot_training - Step 35179: {'lr': 0.0004405178849171221, 'samples': 18012160, 'steps': 35179, 'loss/train': 1.952765941619873} +03/05/2022 06:49:11 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 06:49:15 - INFO - codeparrot_training - Step 35180: {'lr': 0.00044051444879527013, 'samples': 18012672, 'steps': 35180, 'loss/train': 1.752365231513977} +03/05/2022 06:49:18 - INFO - codeparrot_training - Step 35181: {'lr': 0.00044051101258757544, 'samples': 18013184, 'steps': 35181, 'loss/train': 1.7537517547607422} +03/05/2022 06:49:20 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 06:49:23 - INFO - codeparrot_training - Step 35182: {'lr': 0.0004405075762940393, 'samples': 18013696, 'steps': 35182, 'loss/train': 1.0589922666549683} +03/05/2022 06:49:26 - INFO - codeparrot_training - Step 35183: {'lr': 0.00044050413991466344, 'samples': 18014208, 'steps': 35183, 'loss/train': 0.8382319808006287} +03/05/2022 06:49:28 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 06:49:32 - INFO - codeparrot_training - Step 35184: {'lr': 0.0004405007034494494, 'samples': 18014720, 'steps': 35184, 'loss/train': 2.4702956676483154} +03/05/2022 06:49:35 - INFO - codeparrot_training - Step 35185: {'lr': 0.00044049726689839854, 'samples': 18015232, 'steps': 35185, 'loss/train': 2.3381457328796387} +03/05/2022 06:49:36 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/05/2022 06:49:40 - INFO - codeparrot_training - Step 35186: {'lr': 0.0004404938302615126, 'samples': 18015744, 'steps': 35186, 'loss/train': 2.3969335556030273} +03/05/2022 06:49:43 - INFO - codeparrot_training - Step 35187: {'lr': 0.00044049039353879317, 'samples': 18016256, 'steps': 35187, 'loss/train': 1.9619604349136353} +03/05/2022 06:49:45 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/05/2022 06:49:48 - INFO - codeparrot_training - Step 35188: {'lr': 0.00044048695673024166, 'samples': 18016768, 'steps': 35188, 'loss/train': 2.0386698246002197} +03/05/2022 06:49:52 - INFO - codeparrot_training - Step 35189: {'lr': 0.00044048351983585966, 'samples': 18017280, 'steps': 35189, 'loss/train': 2.782304525375366} +03/05/2022 06:49:53 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/05/2022 06:49:57 - INFO - codeparrot_training - Step 35190: {'lr': 0.00044048008285564865, 'samples': 18017792, 'steps': 35190, 'loss/train': 1.0566166639328003} +03/05/2022 06:50:00 - INFO - codeparrot_training - Step 35191: {'lr': 0.0004404766457896104, 'samples': 18018304, 'steps': 35191, 'loss/train': 2.2477879524230957} +03/05/2022 06:50:01 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 06:50:06 - INFO - codeparrot_training - Step 35192: {'lr': 0.0004404732086377462, 'samples': 18018816, 'steps': 35192, 'loss/train': 2.0718581676483154} +03/05/2022 06:50:09 - INFO - codeparrot_training - Step 35193: {'lr': 0.00044046977140005774, 'samples': 18019328, 'steps': 35193, 'loss/train': 1.7784464359283447} +03/05/2022 06:50:10 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/05/2022 06:50:14 - INFO - codeparrot_training - Step 35194: {'lr': 0.00044046633407654657, 'samples': 18019840, 'steps': 35194, 'loss/train': 1.4552630186080933} +03/05/2022 06:50:17 - INFO - codeparrot_training - Step 35195: {'lr': 0.0004404628966672142, 'samples': 18020352, 'steps': 35195, 'loss/train': 1.3339378833770752} +03/05/2022 06:50:22 - INFO - codeparrot_training - Step 35196: {'lr': 0.0004404594591720622, 'samples': 18020864, 'steps': 35196, 'loss/train': 2.127044439315796} +03/05/2022 06:50:26 - INFO - codeparrot_training - Step 35197: {'lr': 0.00044045602159109207, 'samples': 18021376, 'steps': 35197, 'loss/train': 2.4092276096343994} +03/05/2022 06:50:26 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/05/2022 06:50:31 - INFO - codeparrot_training - Step 35198: {'lr': 0.0004404525839243054, 'samples': 18021888, 'steps': 35198, 'loss/train': 2.0275094509124756} +03/05/2022 06:50:34 - INFO - codeparrot_training - Step 35199: {'lr': 0.00044044914617170374, 'samples': 18022400, 'steps': 35199, 'loss/train': 1.4127453565597534} +03/05/2022 06:50:35 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/05/2022 06:50:39 - INFO - codeparrot_training - Step 35200: {'lr': 0.00044044570833328865, 'samples': 18022912, 'steps': 35200, 'loss/train': 1.365465760231018} +03/05/2022 06:50:43 - INFO - codeparrot_training - Step 35201: {'lr': 0.00044044227040906166, 'samples': 18023424, 'steps': 35201, 'loss/train': 1.4684242010116577} +03/05/2022 06:50:44 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/05/2022 06:50:48 - INFO - codeparrot_training - Step 35202: {'lr': 0.00044043883239902425, 'samples': 18023936, 'steps': 35202, 'loss/train': 2.4370481967926025} +03/05/2022 06:50:51 - INFO - codeparrot_training - Step 35203: {'lr': 0.00044043539430317814, 'samples': 18024448, 'steps': 35203, 'loss/train': 1.5070427656173706} +03/05/2022 06:50:52 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/05/2022 06:50:56 - INFO - codeparrot_training - Step 35204: {'lr': 0.00044043195612152475, 'samples': 18024960, 'steps': 35204, 'loss/train': 1.3770948648452759} +03/05/2022 06:50:59 - INFO - codeparrot_training - Step 35205: {'lr': 0.0004404285178540657, 'samples': 18025472, 'steps': 35205, 'loss/train': 1.5335878133773804} +03/05/2022 06:51:00 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 06:51:05 - INFO - codeparrot_training - Step 35206: {'lr': 0.0004404250795008024, 'samples': 18025984, 'steps': 35206, 'loss/train': 1.5627869367599487} +03/05/2022 06:51:08 - INFO - codeparrot_training - Step 35207: {'lr': 0.00044042164106173655, 'samples': 18026496, 'steps': 35207, 'loss/train': 1.9812368154525757} +03/05/2022 06:51:09 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 06:51:13 - INFO - codeparrot_training - Step 35208: {'lr': 0.00044041820253686964, 'samples': 18027008, 'steps': 35208, 'loss/train': 1.741363763809204} +03/05/2022 06:51:16 - INFO - codeparrot_training - Step 35209: {'lr': 0.0004404147639262032, 'samples': 18027520, 'steps': 35209, 'loss/train': 1.1975996494293213} +03/05/2022 06:51:17 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 06:51:22 - INFO - codeparrot_training - Step 35210: {'lr': 0.00044041132522973885, 'samples': 18028032, 'steps': 35210, 'loss/train': 1.5243034362792969} +03/05/2022 06:51:25 - INFO - codeparrot_training - Step 35211: {'lr': 0.0004404078864474781, 'samples': 18028544, 'steps': 35211, 'loss/train': 1.6824908256530762} +03/05/2022 06:51:26 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/05/2022 06:51:30 - INFO - codeparrot_training - Step 35212: {'lr': 0.00044040444757942245, 'samples': 18029056, 'steps': 35212, 'loss/train': 1.688216209411621} +03/05/2022 06:51:33 - INFO - codeparrot_training - Step 35213: {'lr': 0.00044040100862557355, 'samples': 18029568, 'steps': 35213, 'loss/train': 1.4389698505401611} +03/05/2022 06:51:34 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 06:51:39 - INFO - codeparrot_training - Step 35214: {'lr': 0.00044039756958593287, 'samples': 18030080, 'steps': 35214, 'loss/train': 1.472037434577942} +03/05/2022 06:51:42 - INFO - codeparrot_training - Step 35215: {'lr': 0.000440394130460502, 'samples': 18030592, 'steps': 35215, 'loss/train': 2.4172937870025635} +03/05/2022 06:51:42 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/05/2022 06:51:47 - INFO - codeparrot_training - Step 35216: {'lr': 0.00044039069124928245, 'samples': 18031104, 'steps': 35216, 'loss/train': 1.5323917865753174} +03/05/2022 06:51:50 - INFO - codeparrot_training - Step 35217: {'lr': 0.0004403872519522758, 'samples': 18031616, 'steps': 35217, 'loss/train': 2.048283100128174} +03/05/2022 06:51:51 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/05/2022 06:51:55 - INFO - codeparrot_training - Step 35218: {'lr': 0.00044038381256948357, 'samples': 18032128, 'steps': 35218, 'loss/train': 1.6273083686828613} +03/05/2022 06:51:59 - INFO - codeparrot_training - Step 35219: {'lr': 0.00044038037310090736, 'samples': 18032640, 'steps': 35219, 'loss/train': 1.68900728225708} +03/05/2022 06:52:00 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 06:52:04 - INFO - codeparrot_training - Step 35220: {'lr': 0.00044037693354654863, 'samples': 18033152, 'steps': 35220, 'loss/train': 2.021061658859253} +03/05/2022 06:52:07 - INFO - codeparrot_training - Step 35221: {'lr': 0.0004403734939064091, 'samples': 18033664, 'steps': 35221, 'loss/train': 0.6436805725097656} +03/05/2022 06:52:08 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/05/2022 06:52:12 - INFO - codeparrot_training - Step 35222: {'lr': 0.00044037005418049016, 'samples': 18034176, 'steps': 35222, 'loss/train': 1.8494211435317993} +03/05/2022 06:52:16 - INFO - codeparrot_training - Step 35223: {'lr': 0.00044036661436879334, 'samples': 18034688, 'steps': 35223, 'loss/train': 2.0721592903137207} +03/05/2022 06:52:17 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 06:52:21 - INFO - codeparrot_training - Step 35224: {'lr': 0.00044036317447132035, 'samples': 18035200, 'steps': 35224, 'loss/train': 1.4143296480178833} +03/05/2022 06:52:24 - INFO - codeparrot_training - Step 35225: {'lr': 0.00044035973448807266, 'samples': 18035712, 'steps': 35225, 'loss/train': 0.7082532048225403} +03/05/2022 06:52:26 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/05/2022 06:52:30 - INFO - codeparrot_training - Step 35226: {'lr': 0.00044035629441905173, 'samples': 18036224, 'steps': 35226, 'loss/train': 1.7323354482650757} +03/05/2022 06:52:33 - INFO - codeparrot_training - Step 35227: {'lr': 0.0004403528542642592, 'samples': 18036736, 'steps': 35227, 'loss/train': 1.7430179119110107} +03/05/2022 06:52:34 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/05/2022 06:52:38 - INFO - codeparrot_training - Step 35228: {'lr': 0.00044034941402369666, 'samples': 18037248, 'steps': 35228, 'loss/train': 1.3191050291061401} +03/05/2022 06:52:41 - INFO - codeparrot_training - Step 35229: {'lr': 0.0004403459736973656, 'samples': 18037760, 'steps': 35229, 'loss/train': 0.10687405616044998} +03/05/2022 06:52:42 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 06:52:46 - INFO - codeparrot_training - Step 35230: {'lr': 0.00044034253328526765, 'samples': 18038272, 'steps': 35230, 'loss/train': 1.659721851348877} +03/05/2022 06:52:50 - INFO - codeparrot_training - Step 35231: {'lr': 0.00044033909278740416, 'samples': 18038784, 'steps': 35231, 'loss/train': 1.411044955253601} +03/05/2022 06:52:51 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 06:52:55 - INFO - codeparrot_training - Step 35232: {'lr': 0.0004403356522037769, 'samples': 18039296, 'steps': 35232, 'loss/train': 1.7962654829025269} +03/05/2022 06:52:58 - INFO - codeparrot_training - Step 35233: {'lr': 0.00044033221153438727, 'samples': 18039808, 'steps': 35233, 'loss/train': 2.097933530807495} +03/05/2022 06:52:59 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 06:53:03 - INFO - codeparrot_training - Step 35234: {'lr': 0.00044032877077923696, 'samples': 18040320, 'steps': 35234, 'loss/train': 1.517199993133545} +03/05/2022 06:53:07 - INFO - codeparrot_training - Step 35235: {'lr': 0.0004403253299383274, 'samples': 18040832, 'steps': 35235, 'loss/train': 1.7494125366210938} +03/05/2022 06:53:07 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/05/2022 06:53:12 - INFO - codeparrot_training - Step 35236: {'lr': 0.00044032188901166016, 'samples': 18041344, 'steps': 35236, 'loss/train': 1.896111011505127} +03/05/2022 06:53:15 - INFO - codeparrot_training - Step 35237: {'lr': 0.0004403184479992368, 'samples': 18041856, 'steps': 35237, 'loss/train': 1.6752750873565674} +03/05/2022 06:53:16 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/05/2022 06:53:20 - INFO - codeparrot_training - Step 35238: {'lr': 0.000440315006901059, 'samples': 18042368, 'steps': 35238, 'loss/train': 1.8756252527236938} +03/05/2022 06:53:23 - INFO - codeparrot_training - Step 35239: {'lr': 0.00044031156571712807, 'samples': 18042880, 'steps': 35239, 'loss/train': 0.886452853679657} +03/05/2022 06:53:24 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 06:53:29 - INFO - codeparrot_training - Step 35240: {'lr': 0.0004403081244474457, 'samples': 18043392, 'steps': 35240, 'loss/train': 1.243628740310669} +03/05/2022 06:53:32 - INFO - codeparrot_training - Step 35241: {'lr': 0.00044030468309201354, 'samples': 18043904, 'steps': 35241, 'loss/train': 1.7641898393630981} +03/05/2022 06:53:34 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 06:53:37 - INFO - codeparrot_training - Step 35242: {'lr': 0.0004403012416508329, 'samples': 18044416, 'steps': 35242, 'loss/train': 1.3685418367385864} +03/05/2022 06:53:41 - INFO - codeparrot_training - Step 35243: {'lr': 0.00044029780012390553, 'samples': 18044928, 'steps': 35243, 'loss/train': 1.3318215608596802} +03/05/2022 06:53:42 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/05/2022 06:53:46 - INFO - codeparrot_training - Step 35244: {'lr': 0.0004402943585112329, 'samples': 18045440, 'steps': 35244, 'loss/train': 1.5533034801483154} +03/05/2022 06:53:49 - INFO - codeparrot_training - Step 35245: {'lr': 0.0004402909168128165, 'samples': 18045952, 'steps': 35245, 'loss/train': 0.696471095085144} +03/05/2022 06:53:51 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/05/2022 06:53:54 - INFO - codeparrot_training - Step 35246: {'lr': 0.00044028747502865794, 'samples': 18046464, 'steps': 35246, 'loss/train': 2.0247786045074463} +03/05/2022 06:53:58 - INFO - codeparrot_training - Step 35247: {'lr': 0.0004402840331587589, 'samples': 18046976, 'steps': 35247, 'loss/train': 1.5530458688735962} +03/05/2022 06:53:59 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/05/2022 06:54:03 - INFO - codeparrot_training - Step 35248: {'lr': 0.0004402805912031207, 'samples': 18047488, 'steps': 35248, 'loss/train': 2.408391237258911} +03/05/2022 06:54:06 - INFO - codeparrot_training - Step 35249: {'lr': 0.0004402771491617451, 'samples': 18048000, 'steps': 35249, 'loss/train': 1.8934053182601929} +03/05/2022 06:54:07 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 06:54:11 - INFO - codeparrot_training - Step 35250: {'lr': 0.0004402737070346335, 'samples': 18048512, 'steps': 35250, 'loss/train': 1.3560611009597778} +03/05/2022 06:54:14 - INFO - codeparrot_training - Step 35251: {'lr': 0.0004402702648217875, 'samples': 18049024, 'steps': 35251, 'loss/train': 1.8378825187683105} +03/05/2022 06:54:16 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/05/2022 06:54:20 - INFO - codeparrot_training - Step 35252: {'lr': 0.00044026682252320864, 'samples': 18049536, 'steps': 35252, 'loss/train': 2.0805418491363525} +03/05/2022 06:54:23 - INFO - codeparrot_training - Step 35253: {'lr': 0.00044026338013889853, 'samples': 18050048, 'steps': 35253, 'loss/train': 1.6716229915618896} +03/05/2022 06:54:27 - INFO - codeparrot_training - Step 35254: {'lr': 0.00044025993766885866, 'samples': 18050560, 'steps': 35254, 'loss/train': 1.2776844501495361} +03/05/2022 06:54:27 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/05/2022 06:54:32 - INFO - codeparrot_training - Step 35255: {'lr': 0.00044025649511309064, 'samples': 18051072, 'steps': 35255, 'loss/train': 1.305182695388794} +03/05/2022 06:54:35 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 06:54:37 - INFO - codeparrot_training - Step 35256: {'lr': 0.00044025305247159585, 'samples': 18051584, 'steps': 35256, 'loss/train': 1.637816071510315} +03/05/2022 06:54:41 - INFO - codeparrot_training - Step 35257: {'lr': 0.00044024960974437606, 'samples': 18052096, 'steps': 35257, 'loss/train': 2.3576536178588867} +03/05/2022 06:54:44 - INFO - codeparrot_training - Step 35258: {'lr': 0.0004402461669314327, 'samples': 18052608, 'steps': 35258, 'loss/train': 0.06298601627349854} +03/05/2022 06:54:44 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 06:54:49 - INFO - codeparrot_training - Step 35259: {'lr': 0.0004402427240327674, 'samples': 18053120, 'steps': 35259, 'loss/train': 2.0097126960754395} +03/05/2022 06:54:52 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/05/2022 06:54:54 - INFO - codeparrot_training - Step 35260: {'lr': 0.0004402392810483816, 'samples': 18053632, 'steps': 35260, 'loss/train': 1.7525627613067627} +03/05/2022 06:54:57 - INFO - codeparrot_training - Step 35261: {'lr': 0.000440235837978277, 'samples': 18054144, 'steps': 35261, 'loss/train': 2.146052837371826} +03/05/2022 06:54:59 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/05/2022 06:55:03 - INFO - codeparrot_training - Step 35262: {'lr': 0.00044023239482245504, 'samples': 18054656, 'steps': 35262, 'loss/train': 1.607692003250122} +03/05/2022 06:55:06 - INFO - codeparrot_training - Step 35263: {'lr': 0.0004402289515809172, 'samples': 18055168, 'steps': 35263, 'loss/train': 2.072352647781372} +03/05/2022 06:55:07 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 06:55:11 - INFO - codeparrot_training - Step 35264: {'lr': 0.00044022550825366526, 'samples': 18055680, 'steps': 35264, 'loss/train': 2.176309585571289} +03/05/2022 06:55:14 - INFO - codeparrot_training - Step 35265: {'lr': 0.0004402220648407006, 'samples': 18056192, 'steps': 35265, 'loss/train': 1.2910398244857788} +03/05/2022 06:55:16 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/05/2022 06:55:20 - INFO - codeparrot_training - Step 35266: {'lr': 0.00044021862134202485, 'samples': 18056704, 'steps': 35266, 'loss/train': 1.8105504512786865} +03/05/2022 06:55:23 - INFO - codeparrot_training - Step 35267: {'lr': 0.00044021517775763943, 'samples': 18057216, 'steps': 35267, 'loss/train': 2.2113943099975586} +03/05/2022 06:55:26 - INFO - codeparrot_training - Step 35268: {'lr': 0.00044021173408754604, 'samples': 18057728, 'steps': 35268, 'loss/train': 1.9094796180725098} +03/05/2022 06:55:27 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 06:55:31 - INFO - codeparrot_training - Step 35269: {'lr': 0.00044020829033174615, 'samples': 18058240, 'steps': 35269, 'loss/train': 1.143203616142273} +03/05/2022 06:55:35 - INFO - codeparrot_training - Step 35270: {'lr': 0.0004402048464902414, 'samples': 18058752, 'steps': 35270, 'loss/train': 2.0604827404022217} +03/05/2022 06:55:35 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 06:55:40 - INFO - codeparrot_training - Step 35271: {'lr': 0.0004402014025630332, 'samples': 18059264, 'steps': 35271, 'loss/train': 1.8925144672393799} +03/05/2022 06:55:43 - INFO - codeparrot_training - Step 35272: {'lr': 0.00044019795855012325, 'samples': 18059776, 'steps': 35272, 'loss/train': 1.9194886684417725} +03/05/2022 06:55:43 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/05/2022 06:55:48 - INFO - codeparrot_training - Step 35273: {'lr': 0.00044019451445151305, 'samples': 18060288, 'steps': 35273, 'loss/train': 1.029765009880066} +03/05/2022 06:55:51 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/05/2022 06:55:54 - INFO - codeparrot_training - Step 35274: {'lr': 0.00044019107026720404, 'samples': 18060800, 'steps': 35274, 'loss/train': 2.251084089279175} +03/05/2022 06:55:57 - INFO - codeparrot_training - Step 35275: {'lr': 0.00044018762599719796, 'samples': 18061312, 'steps': 35275, 'loss/train': 3.465636730194092} +03/05/2022 06:56:00 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 06:56:02 - INFO - codeparrot_training - Step 35276: {'lr': 0.0004401841816414962, 'samples': 18061824, 'steps': 35276, 'loss/train': 1.4118013381958008} +03/05/2022 06:56:05 - INFO - codeparrot_training - Step 35277: {'lr': 0.0004401807372001004, 'samples': 18062336, 'steps': 35277, 'loss/train': 1.5762361288070679} +03/05/2022 06:56:08 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 06:56:11 - INFO - codeparrot_training - Step 35278: {'lr': 0.0004401772926730122, 'samples': 18062848, 'steps': 35278, 'loss/train': 1.6931037902832031} +03/05/2022 06:56:14 - INFO - codeparrot_training - Step 35279: {'lr': 0.0004401738480602329, 'samples': 18063360, 'steps': 35279, 'loss/train': 2.070978879928589} +03/05/2022 06:56:16 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 06:56:19 - INFO - codeparrot_training - Step 35280: {'lr': 0.0004401704033617643, 'samples': 18063872, 'steps': 35280, 'loss/train': 2.350919485092163} +03/05/2022 06:56:22 - INFO - codeparrot_training - Step 35281: {'lr': 0.0004401669585776078, 'samples': 18064384, 'steps': 35281, 'loss/train': 1.8100520372390747} +03/05/2022 06:56:25 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 06:56:27 - INFO - codeparrot_training - Step 35282: {'lr': 0.000440163513707765, 'samples': 18064896, 'steps': 35282, 'loss/train': 1.4309860467910767} +03/05/2022 06:56:31 - INFO - codeparrot_training - Step 35283: {'lr': 0.00044016006875223745, 'samples': 18065408, 'steps': 35283, 'loss/train': 1.3934221267700195} +03/05/2022 06:56:33 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 06:56:36 - INFO - codeparrot_training - Step 35284: {'lr': 0.00044015662371102676, 'samples': 18065920, 'steps': 35284, 'loss/train': 1.2196741104125977} +03/05/2022 06:56:39 - INFO - codeparrot_training - Step 35285: {'lr': 0.0004401531785841344, 'samples': 18066432, 'steps': 35285, 'loss/train': 0.1861148178577423} +03/05/2022 06:56:42 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 06:56:44 - INFO - codeparrot_training - Step 35286: {'lr': 0.00044014973337156197, 'samples': 18066944, 'steps': 35286, 'loss/train': 1.5490472316741943} +03/05/2022 06:56:48 - INFO - codeparrot_training - Step 35287: {'lr': 0.0004401462880733109, 'samples': 18067456, 'steps': 35287, 'loss/train': 0.8867214322090149} +03/05/2022 06:56:50 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 06:56:53 - INFO - codeparrot_training - Step 35288: {'lr': 0.000440142842689383, 'samples': 18067968, 'steps': 35288, 'loss/train': 0.9408214092254639} +03/05/2022 06:56:56 - INFO - codeparrot_training - Step 35289: {'lr': 0.00044013939721977957, 'samples': 18068480, 'steps': 35289, 'loss/train': 1.046529769897461} +03/05/2022 06:56:58 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 06:57:02 - INFO - codeparrot_training - Step 35290: {'lr': 0.0004401359516645023, 'samples': 18068992, 'steps': 35290, 'loss/train': 1.8166972398757935} +03/05/2022 06:57:05 - INFO - codeparrot_training - Step 35291: {'lr': 0.0004401325060235527, 'samples': 18069504, 'steps': 35291, 'loss/train': 2.0292959213256836} +03/05/2022 06:57:07 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/05/2022 06:57:10 - INFO - codeparrot_training - Step 35292: {'lr': 0.00044012906029693236, 'samples': 18070016, 'steps': 35292, 'loss/train': 2.0658133029937744} +03/05/2022 06:57:13 - INFO - codeparrot_training - Step 35293: {'lr': 0.0004401256144846427, 'samples': 18070528, 'steps': 35293, 'loss/train': 1.2539781332015991} +03/05/2022 06:57:15 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 06:57:18 - INFO - codeparrot_training - Step 35294: {'lr': 0.0004401221685866854, 'samples': 18071040, 'steps': 35294, 'loss/train': 1.4674147367477417} +03/05/2022 06:57:22 - INFO - codeparrot_training - Step 35295: {'lr': 0.00044011872260306205, 'samples': 18071552, 'steps': 35295, 'loss/train': 1.7095365524291992} +03/05/2022 06:57:23 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 06:57:27 - INFO - codeparrot_training - Step 35296: {'lr': 0.00044011527653377416, 'samples': 18072064, 'steps': 35296, 'loss/train': 2.2766706943511963} +03/05/2022 06:57:30 - INFO - codeparrot_training - Step 35297: {'lr': 0.0004401118303788232, 'samples': 18072576, 'steps': 35297, 'loss/train': 1.571519374847412} +03/05/2022 06:57:32 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 06:57:36 - INFO - codeparrot_training - Step 35298: {'lr': 0.00044010838413821075, 'samples': 18073088, 'steps': 35298, 'loss/train': 1.028382420539856} +03/05/2022 06:57:39 - INFO - codeparrot_training - Step 35299: {'lr': 0.0004401049378119384, 'samples': 18073600, 'steps': 35299, 'loss/train': 1.6650125980377197} +03/05/2022 06:57:43 - INFO - codeparrot_training - Step 35300: {'lr': 0.0004401014914000078, 'samples': 18074112, 'steps': 35300, 'loss/train': 1.7972372770309448} +03/05/2022 06:57:44 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/05/2022 06:57:48 - INFO - codeparrot_training - Step 35301: {'lr': 0.00044009804490242026, 'samples': 18074624, 'steps': 35301, 'loss/train': 1.3866076469421387} +03/05/2022 06:57:51 - INFO - codeparrot_training - Step 35302: {'lr': 0.00044009459831917755, 'samples': 18075136, 'steps': 35302, 'loss/train': 1.3412504196166992} +03/05/2022 06:57:52 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/05/2022 06:57:56 - INFO - codeparrot_training - Step 35303: {'lr': 0.00044009115165028113, 'samples': 18075648, 'steps': 35303, 'loss/train': 1.9598032236099243} +03/05/2022 06:58:00 - INFO - codeparrot_training - Step 35304: {'lr': 0.0004400877048957326, 'samples': 18076160, 'steps': 35304, 'loss/train': 2.306246280670166} +03/05/2022 06:58:01 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/05/2022 06:58:05 - INFO - codeparrot_training - Step 35305: {'lr': 0.00044008425805553347, 'samples': 18076672, 'steps': 35305, 'loss/train': 2.0815351009368896} +03/05/2022 06:58:08 - INFO - codeparrot_training - Step 35306: {'lr': 0.00044008081112968537, 'samples': 18077184, 'steps': 35306, 'loss/train': 1.9238163232803345} +03/05/2022 06:58:09 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/05/2022 06:58:13 - INFO - codeparrot_training - Step 35307: {'lr': 0.0004400773641181897, 'samples': 18077696, 'steps': 35307, 'loss/train': 1.2600516080856323} +03/05/2022 06:58:16 - INFO - codeparrot_training - Step 35308: {'lr': 0.0004400739170210481, 'samples': 18078208, 'steps': 35308, 'loss/train': 1.9850765466690063} +03/05/2022 06:58:18 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/05/2022 06:58:22 - INFO - codeparrot_training - Step 35309: {'lr': 0.00044007046983826213, 'samples': 18078720, 'steps': 35309, 'loss/train': 1.2172423601150513} +03/05/2022 06:58:25 - INFO - codeparrot_training - Step 35310: {'lr': 0.0004400670225698333, 'samples': 18079232, 'steps': 35310, 'loss/train': 1.993540644645691} +03/05/2022 06:58:27 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/05/2022 06:58:30 - INFO - codeparrot_training - Step 35311: {'lr': 0.00044006357521576334, 'samples': 18079744, 'steps': 35311, 'loss/train': 1.8923696279525757} +03/05/2022 06:58:33 - INFO - codeparrot_training - Step 35312: {'lr': 0.0004400601277760536, 'samples': 18080256, 'steps': 35312, 'loss/train': 2.0124077796936035} +03/05/2022 06:58:35 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 06:58:39 - INFO - codeparrot_training - Step 35313: {'lr': 0.0004400566802507057, 'samples': 18080768, 'steps': 35313, 'loss/train': 1.5543030500411987} +03/05/2022 06:58:42 - INFO - codeparrot_training - Step 35314: {'lr': 0.0004400532326397211, 'samples': 18081280, 'steps': 35314, 'loss/train': 2.284696340560913} +03/05/2022 06:58:43 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 06:58:47 - INFO - codeparrot_training - Step 35315: {'lr': 0.00044004978494310154, 'samples': 18081792, 'steps': 35315, 'loss/train': 1.8311705589294434} +03/05/2022 06:58:50 - INFO - codeparrot_training - Step 35316: {'lr': 0.00044004633716084854, 'samples': 18082304, 'steps': 35316, 'loss/train': 1.121253490447998} +03/05/2022 06:58:52 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 06:58:56 - INFO - codeparrot_training - Step 35317: {'lr': 0.0004400428892929635, 'samples': 18082816, 'steps': 35317, 'loss/train': 0.7658836841583252} +03/05/2022 06:58:59 - INFO - codeparrot_training - Step 35318: {'lr': 0.00044003944133944804, 'samples': 18083328, 'steps': 35318, 'loss/train': 1.0751616954803467} +03/05/2022 06:59:01 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 06:59:04 - INFO - codeparrot_training - Step 35319: {'lr': 0.00044003599330030385, 'samples': 18083840, 'steps': 35319, 'loss/train': 1.7972129583358765} +03/05/2022 06:59:07 - INFO - codeparrot_training - Step 35320: {'lr': 0.00044003254517553225, 'samples': 18084352, 'steps': 35320, 'loss/train': 2.182401418685913} +03/05/2022 06:59:09 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/05/2022 06:59:13 - INFO - codeparrot_training - Step 35321: {'lr': 0.000440029096965135, 'samples': 18084864, 'steps': 35321, 'loss/train': 2.3707339763641357} +03/05/2022 06:59:16 - INFO - codeparrot_training - Step 35322: {'lr': 0.0004400256486691135, 'samples': 18085376, 'steps': 35322, 'loss/train': 0.9176000356674194} +03/05/2022 06:59:17 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 06:59:21 - INFO - codeparrot_training - Step 35323: {'lr': 0.0004400222002874695, 'samples': 18085888, 'steps': 35323, 'loss/train': 1.3983089923858643} +03/05/2022 06:59:24 - INFO - codeparrot_training - Step 35324: {'lr': 0.0004400187518202043, 'samples': 18086400, 'steps': 35324, 'loss/train': 2.0238094329833984} +03/05/2022 06:59:26 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 06:59:30 - INFO - codeparrot_training - Step 35325: {'lr': 0.00044001530326731966, 'samples': 18086912, 'steps': 35325, 'loss/train': 1.357438325881958} +03/05/2022 06:59:33 - INFO - codeparrot_training - Step 35326: {'lr': 0.00044001185462881707, 'samples': 18087424, 'steps': 35326, 'loss/train': 1.9831361770629883} +03/05/2022 06:59:38 - INFO - codeparrot_training - Step 35327: {'lr': 0.000440008405904698, 'samples': 18087936, 'steps': 35327, 'loss/train': 2.250537872314453} +03/05/2022 06:59:41 - INFO - codeparrot_training - Step 35328: {'lr': 0.0004400049570949641, 'samples': 18088448, 'steps': 35328, 'loss/train': 2.08791184425354} +03/05/2022 06:59:43 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/05/2022 06:59:47 - INFO - codeparrot_training - Step 35329: {'lr': 0.0004400015081996169, 'samples': 18088960, 'steps': 35329, 'loss/train': 0.4197874069213867} +03/05/2022 06:59:50 - INFO - codeparrot_training - Step 35330: {'lr': 0.000439998059218658, 'samples': 18089472, 'steps': 35330, 'loss/train': 0.9868565797805786} +03/05/2022 06:59:51 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/05/2022 06:59:55 - INFO - codeparrot_training - Step 35331: {'lr': 0.0004399946101520889, 'samples': 18089984, 'steps': 35331, 'loss/train': 1.6940288543701172} +03/05/2022 06:59:58 - INFO - codeparrot_training - Step 35332: {'lr': 0.0004399911609999111, 'samples': 18090496, 'steps': 35332, 'loss/train': 0.921269416809082} +03/05/2022 07:00:00 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 07:00:03 - INFO - codeparrot_training - Step 35333: {'lr': 0.0004399877117621262, 'samples': 18091008, 'steps': 35333, 'loss/train': 1.8890599012374878} +03/05/2022 07:00:07 - INFO - codeparrot_training - Step 35334: {'lr': 0.0004399842624387358, 'samples': 18091520, 'steps': 35334, 'loss/train': 1.8547402620315552} +03/05/2022 07:00:08 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/05/2022 07:00:12 - INFO - codeparrot_training - Step 35335: {'lr': 0.0004399808130297415, 'samples': 18092032, 'steps': 35335, 'loss/train': 1.7220582962036133} +03/05/2022 07:00:15 - INFO - codeparrot_training - Step 35336: {'lr': 0.0004399773635351446, 'samples': 18092544, 'steps': 35336, 'loss/train': 1.0082073211669922} +03/05/2022 07:00:16 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/05/2022 07:00:20 - INFO - codeparrot_training - Step 35337: {'lr': 0.000439973913954947, 'samples': 18093056, 'steps': 35337, 'loss/train': 1.9139010906219482} +03/05/2022 07:00:24 - INFO - codeparrot_training - Step 35338: {'lr': 0.00043997046428915, 'samples': 18093568, 'steps': 35338, 'loss/train': 1.4447370767593384} +03/05/2022 07:00:25 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 07:00:29 - INFO - codeparrot_training - Step 35339: {'lr': 0.00043996701453775526, 'samples': 18094080, 'steps': 35339, 'loss/train': 1.6520748138427734} +03/05/2022 07:00:32 - INFO - codeparrot_training - Step 35340: {'lr': 0.0004399635647007643, 'samples': 18094592, 'steps': 35340, 'loss/train': 1.9704729318618774} +03/05/2022 07:00:33 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/05/2022 07:00:37 - INFO - codeparrot_training - Step 35341: {'lr': 0.00043996011477817875, 'samples': 18095104, 'steps': 35341, 'loss/train': 0.7786961793899536} +03/05/2022 07:00:41 - INFO - codeparrot_training - Step 35342: {'lr': 0.0004399566647700001, 'samples': 18095616, 'steps': 35342, 'loss/train': 1.8221663236618042} +03/05/2022 07:00:42 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 07:00:46 - INFO - codeparrot_training - Step 35343: {'lr': 0.00043995321467622984, 'samples': 18096128, 'steps': 35343, 'loss/train': 2.1878201961517334} +03/05/2022 07:00:49 - INFO - codeparrot_training - Step 35344: {'lr': 0.00043994976449686964, 'samples': 18096640, 'steps': 35344, 'loss/train': 1.6983578205108643} +03/05/2022 07:00:50 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 07:00:54 - INFO - codeparrot_training - Step 35345: {'lr': 0.000439946314231921, 'samples': 18097152, 'steps': 35345, 'loss/train': 2.1267080307006836} +03/05/2022 07:00:58 - INFO - codeparrot_training - Step 35346: {'lr': 0.00043994286388138545, 'samples': 18097664, 'steps': 35346, 'loss/train': 1.5404174327850342} +03/05/2022 07:00:58 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/05/2022 07:01:03 - INFO - codeparrot_training - Step 35347: {'lr': 0.00043993941344526455, 'samples': 18098176, 'steps': 35347, 'loss/train': 1.4122116565704346} +03/05/2022 07:01:06 - INFO - codeparrot_training - Step 35348: {'lr': 0.00043993596292356, 'samples': 18098688, 'steps': 35348, 'loss/train': 1.5588442087173462} +03/05/2022 07:01:07 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/05/2022 07:01:12 - INFO - codeparrot_training - Step 35349: {'lr': 0.00043993251231627315, 'samples': 18099200, 'steps': 35349, 'loss/train': 0.6924774646759033} +03/05/2022 07:01:15 - INFO - codeparrot_training - Step 35350: {'lr': 0.00043992906162340563, 'samples': 18099712, 'steps': 35350, 'loss/train': 1.5470616817474365} +03/05/2022 07:01:16 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/05/2022 07:01:20 - INFO - codeparrot_training - Step 35351: {'lr': 0.00043992561084495906, 'samples': 18100224, 'steps': 35351, 'loss/train': 1.2256709337234497} +03/05/2022 07:01:23 - INFO - codeparrot_training - Step 35352: {'lr': 0.0004399221599809349, 'samples': 18100736, 'steps': 35352, 'loss/train': 1.5401110649108887} +03/05/2022 07:01:24 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/05/2022 07:01:28 - INFO - codeparrot_training - Step 35353: {'lr': 0.0004399187090313348, 'samples': 18101248, 'steps': 35353, 'loss/train': 1.2445257902145386} +03/05/2022 07:01:31 - INFO - codeparrot_training - Step 35354: {'lr': 0.00043991525799616017, 'samples': 18101760, 'steps': 35354, 'loss/train': 1.0015606880187988} +03/05/2022 07:01:33 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/05/2022 07:01:37 - INFO - codeparrot_training - Step 35355: {'lr': 0.0004399118068754127, 'samples': 18102272, 'steps': 35355, 'loss/train': 1.7397176027297974} +03/05/2022 07:01:40 - INFO - codeparrot_training - Step 35356: {'lr': 0.0004399083556690939, 'samples': 18102784, 'steps': 35356, 'loss/train': 1.5548946857452393} +03/05/2022 07:01:42 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/05/2022 07:01:45 - INFO - codeparrot_training - Step 35357: {'lr': 0.0004399049043772053, 'samples': 18103296, 'steps': 35357, 'loss/train': 1.9874422550201416} +03/05/2022 07:01:48 - INFO - codeparrot_training - Step 35358: {'lr': 0.00043990145299974853, 'samples': 18103808, 'steps': 35358, 'loss/train': 1.2755451202392578} +03/05/2022 07:01:50 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 07:01:54 - INFO - codeparrot_training - Step 35359: {'lr': 0.0004398980015367251, 'samples': 18104320, 'steps': 35359, 'loss/train': 1.3763563632965088} +03/05/2022 07:01:57 - INFO - codeparrot_training - Step 35360: {'lr': 0.00043989454998813655, 'samples': 18104832, 'steps': 35360, 'loss/train': 1.8528356552124023} +03/05/2022 07:01:58 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/05/2022 07:02:03 - INFO - codeparrot_training - Step 35361: {'lr': 0.00043989109835398444, 'samples': 18105344, 'steps': 35361, 'loss/train': 1.833365797996521} +03/05/2022 07:02:06 - INFO - codeparrot_training - Step 35362: {'lr': 0.0004398876466342703, 'samples': 18105856, 'steps': 35362, 'loss/train': 2.0001299381256104} +03/05/2022 07:02:08 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/05/2022 07:02:11 - INFO - codeparrot_training - Step 35363: {'lr': 0.0004398841948289958, 'samples': 18106368, 'steps': 35363, 'loss/train': 1.8641129732131958} +03/05/2022 07:02:14 - INFO - codeparrot_training - Step 35364: {'lr': 0.0004398807429381623, 'samples': 18106880, 'steps': 35364, 'loss/train': 1.8249119520187378} +03/05/2022 07:02:17 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 07:02:20 - INFO - codeparrot_training - Step 35365: {'lr': 0.0004398772909617715, 'samples': 18107392, 'steps': 35365, 'loss/train': 2.0784218311309814} +03/05/2022 07:02:23 - INFO - codeparrot_training - Step 35366: {'lr': 0.00043987383889982495, 'samples': 18107904, 'steps': 35366, 'loss/train': 2.1477928161621094} +03/05/2022 07:02:28 - INFO - codeparrot_training - Step 35367: {'lr': 0.00043987038675232415, 'samples': 18108416, 'steps': 35367, 'loss/train': 2.054490327835083} +03/05/2022 07:02:31 - INFO - codeparrot_training - Step 35368: {'lr': 0.00043986693451927074, 'samples': 18108928, 'steps': 35368, 'loss/train': 1.482446312904358} +03/05/2022 07:02:33 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/05/2022 07:02:36 - INFO - codeparrot_training - Step 35369: {'lr': 0.0004398634822006662, 'samples': 18109440, 'steps': 35369, 'loss/train': 1.7927310466766357} +03/05/2022 07:02:39 - INFO - codeparrot_training - Step 35370: {'lr': 0.0004398600297965121, 'samples': 18109952, 'steps': 35370, 'loss/train': 1.8192603588104248} +03/05/2022 07:02:41 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/05/2022 07:02:45 - INFO - codeparrot_training - Step 35371: {'lr': 0.00043985657730680997, 'samples': 18110464, 'steps': 35371, 'loss/train': 1.9959499835968018} +03/05/2022 07:02:48 - INFO - codeparrot_training - Step 35372: {'lr': 0.00043985312473156143, 'samples': 18110976, 'steps': 35372, 'loss/train': 1.8132721185684204} +03/05/2022 07:02:50 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 07:02:53 - INFO - codeparrot_training - Step 35373: {'lr': 0.000439849672070768, 'samples': 18111488, 'steps': 35373, 'loss/train': 1.3391412496566772} +03/05/2022 07:02:56 - INFO - codeparrot_training - Step 35374: {'lr': 0.00043984621932443115, 'samples': 18112000, 'steps': 35374, 'loss/train': 1.7567896842956543} +03/05/2022 07:02:58 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 07:03:02 - INFO - codeparrot_training - Step 35375: {'lr': 0.0004398427664925526, 'samples': 18112512, 'steps': 35375, 'loss/train': 1.798269510269165} +03/05/2022 07:03:05 - INFO - codeparrot_training - Step 35376: {'lr': 0.0004398393135751338, 'samples': 18113024, 'steps': 35376, 'loss/train': 1.3682106733322144} +03/05/2022 07:03:07 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 07:03:10 - INFO - codeparrot_training - Step 35377: {'lr': 0.0004398358605721764, 'samples': 18113536, 'steps': 35377, 'loss/train': 1.4039280414581299} +03/05/2022 07:03:14 - INFO - codeparrot_training - Step 35378: {'lr': 0.00043983240748368186, 'samples': 18114048, 'steps': 35378, 'loss/train': 1.766485571861267} +03/05/2022 07:03:16 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/05/2022 07:03:19 - INFO - codeparrot_training - Step 35379: {'lr': 0.0004398289543096518, 'samples': 18114560, 'steps': 35379, 'loss/train': 1.9724292755126953} +03/05/2022 07:03:22 - INFO - codeparrot_training - Step 35380: {'lr': 0.0004398255010500877, 'samples': 18115072, 'steps': 35380, 'loss/train': 1.424758791923523} +03/05/2022 07:03:24 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 07:03:27 - INFO - codeparrot_training - Step 35381: {'lr': 0.00043982204770499114, 'samples': 18115584, 'steps': 35381, 'loss/train': 1.5251657962799072} +03/05/2022 07:03:30 - INFO - codeparrot_training - Step 35382: {'lr': 0.0004398185942743637, 'samples': 18116096, 'steps': 35382, 'loss/train': 1.8977800607681274} +03/05/2022 07:03:32 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 07:03:36 - INFO - codeparrot_training - Step 35383: {'lr': 0.00043981514075820693, 'samples': 18116608, 'steps': 35383, 'loss/train': 2.8138232231140137} +03/05/2022 07:03:39 - INFO - codeparrot_training - Step 35384: {'lr': 0.0004398116871565224, 'samples': 18117120, 'steps': 35384, 'loss/train': 2.2875161170959473} +03/05/2022 07:03:42 - INFO - codeparrot_training - Step 35385: {'lr': 0.0004398082334693116, 'samples': 18117632, 'steps': 35385, 'loss/train': 1.1356678009033203} +03/05/2022 07:03:43 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/05/2022 07:03:48 - INFO - codeparrot_training - Step 35386: {'lr': 0.0004398047796965762, 'samples': 18118144, 'steps': 35386, 'loss/train': 1.838055968284607} +03/05/2022 07:03:51 - INFO - codeparrot_training - Step 35387: {'lr': 0.0004398013258383177, 'samples': 18118656, 'steps': 35387, 'loss/train': 1.5081732273101807} +03/05/2022 07:03:51 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 07:03:56 - INFO - codeparrot_training - Step 35388: {'lr': 0.0004397978718945377, 'samples': 18119168, 'steps': 35388, 'loss/train': 1.5435891151428223} +03/05/2022 07:03:59 - INFO - codeparrot_training - Step 35389: {'lr': 0.0004397944178652376, 'samples': 18119680, 'steps': 35389, 'loss/train': 1.090344786643982} +03/05/2022 07:04:00 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 07:04:04 - INFO - codeparrot_training - Step 35390: {'lr': 0.0004397909637504191, 'samples': 18120192, 'steps': 35390, 'loss/train': 1.2470654249191284} +03/05/2022 07:04:08 - INFO - codeparrot_training - Step 35391: {'lr': 0.00043978750955008374, 'samples': 18120704, 'steps': 35391, 'loss/train': 1.9085487127304077} +03/05/2022 07:04:08 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 07:04:13 - INFO - codeparrot_training - Step 35392: {'lr': 0.00043978405526423305, 'samples': 18121216, 'steps': 35392, 'loss/train': 1.8536357879638672} +03/05/2022 07:04:16 - INFO - codeparrot_training - Step 35393: {'lr': 0.0004397806008928686, 'samples': 18121728, 'steps': 35393, 'loss/train': 2.1193811893463135} +03/05/2022 07:04:17 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/05/2022 07:04:21 - INFO - codeparrot_training - Step 35394: {'lr': 0.00043977714643599194, 'samples': 18122240, 'steps': 35394, 'loss/train': 2.59478759765625} +03/05/2022 07:04:25 - INFO - codeparrot_training - Step 35395: {'lr': 0.0004397736918936046, 'samples': 18122752, 'steps': 35395, 'loss/train': 2.2446210384368896} +03/05/2022 07:04:25 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 07:04:30 - INFO - codeparrot_training - Step 35396: {'lr': 0.0004397702372657082, 'samples': 18123264, 'steps': 35396, 'loss/train': 2.155799388885498} +03/05/2022 07:04:33 - INFO - codeparrot_training - Step 35397: {'lr': 0.00043976678255230417, 'samples': 18123776, 'steps': 35397, 'loss/train': 1.3293895721435547} +03/05/2022 07:04:33 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/05/2022 07:04:38 - INFO - codeparrot_training - Step 35398: {'lr': 0.0004397633277533942, 'samples': 18124288, 'steps': 35398, 'loss/train': 2.1473093032836914} +03/05/2022 07:04:41 - INFO - codeparrot_training - Step 35399: {'lr': 0.0004397598728689799, 'samples': 18124800, 'steps': 35399, 'loss/train': 0.9424858093261719} +03/05/2022 07:04:41 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/05/2022 07:04:47 - INFO - codeparrot_training - Step 35400: {'lr': 0.0004397564178990626, 'samples': 18125312, 'steps': 35400, 'loss/train': 1.1495715379714966} +03/05/2022 07:04:50 - INFO - codeparrot_training - Step 35401: {'lr': 0.0004397529628436441, 'samples': 18125824, 'steps': 35401, 'loss/train': 1.5135173797607422} +03/05/2022 07:04:50 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/05/2022 07:04:55 - INFO - codeparrot_training - Step 35402: {'lr': 0.0004397495077027258, 'samples': 18126336, 'steps': 35402, 'loss/train': 1.9945933818817139} +03/05/2022 07:04:58 - INFO - codeparrot_training - Step 35403: {'lr': 0.0004397460524763093, 'samples': 18126848, 'steps': 35403, 'loss/train': 1.6207804679870605} +03/05/2022 07:04:58 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 07:05:04 - INFO - codeparrot_training - Step 35404: {'lr': 0.00043974259716439613, 'samples': 18127360, 'steps': 35404, 'loss/train': 1.6628955602645874} +03/05/2022 07:05:06 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/05/2022 07:05:09 - INFO - codeparrot_training - Step 35405: {'lr': 0.0004397391417669878, 'samples': 18127872, 'steps': 35405, 'loss/train': 0.11123427748680115} +03/05/2022 07:05:12 - INFO - codeparrot_training - Step 35406: {'lr': 0.0004397356862840861, 'samples': 18128384, 'steps': 35406, 'loss/train': 2.481821060180664} +03/05/2022 07:05:15 - INFO - codeparrot_training - Step 35407: {'lr': 0.00043973223071569234, 'samples': 18128896, 'steps': 35407, 'loss/train': 1.215630054473877} +03/05/2022 07:05:16 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/05/2022 07:05:21 - INFO - codeparrot_training - Step 35408: {'lr': 0.0004397287750618082, 'samples': 18129408, 'steps': 35408, 'loss/train': 1.9810402393341064} +03/05/2022 07:05:24 - INFO - codeparrot_training - Step 35409: {'lr': 0.00043972531932243516, 'samples': 18129920, 'steps': 35409, 'loss/train': 1.8107131719589233} +03/05/2022 07:05:24 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 07:05:29 - INFO - codeparrot_training - Step 35410: {'lr': 0.00043972186349757484, 'samples': 18130432, 'steps': 35410, 'loss/train': 2.0783777236938477} +03/05/2022 07:05:32 - INFO - codeparrot_training - Step 35411: {'lr': 0.0004397184075872288, 'samples': 18130944, 'steps': 35411, 'loss/train': 1.9865747690200806} +03/05/2022 07:05:32 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/05/2022 07:05:38 - INFO - codeparrot_training - Step 35412: {'lr': 0.0004397149515913985, 'samples': 18131456, 'steps': 35412, 'loss/train': 1.385214924812317} +03/05/2022 07:05:41 - INFO - codeparrot_training - Step 35413: {'lr': 0.0004397114955100856, 'samples': 18131968, 'steps': 35413, 'loss/train': 1.933100700378418} +03/05/2022 07:05:41 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/05/2022 07:05:46 - INFO - codeparrot_training - Step 35414: {'lr': 0.00043970803934329167, 'samples': 18132480, 'steps': 35414, 'loss/train': 1.5497139692306519} +03/05/2022 07:05:49 - INFO - codeparrot_training - Step 35415: {'lr': 0.00043970458309101825, 'samples': 18132992, 'steps': 35415, 'loss/train': 1.764042615890503} +03/05/2022 07:05:50 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 07:05:55 - INFO - codeparrot_training - Step 35416: {'lr': 0.0004397011267532668, 'samples': 18133504, 'steps': 35416, 'loss/train': 1.5525791645050049} +03/05/2022 07:05:58 - INFO - codeparrot_training - Step 35417: {'lr': 0.00043969767033003894, 'samples': 18134016, 'steps': 35417, 'loss/train': 1.6743561029434204} +03/05/2022 07:05:58 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/05/2022 07:06:03 - INFO - codeparrot_training - Step 35418: {'lr': 0.0004396942138213363, 'samples': 18134528, 'steps': 35418, 'loss/train': 1.0302960872650146} +03/05/2022 07:06:06 - INFO - codeparrot_training - Step 35419: {'lr': 0.00043969075722716033, 'samples': 18135040, 'steps': 35419, 'loss/train': 0.9071533679962158} +03/05/2022 07:06:07 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/05/2022 07:06:12 - INFO - codeparrot_training - Step 35420: {'lr': 0.0004396873005475127, 'samples': 18135552, 'steps': 35420, 'loss/train': 1.0570669174194336} +03/05/2022 07:06:15 - INFO - codeparrot_training - Step 35421: {'lr': 0.00043968384378239477, 'samples': 18136064, 'steps': 35421, 'loss/train': 1.7101402282714844} +03/05/2022 07:06:15 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/05/2022 07:06:20 - INFO - codeparrot_training - Step 35422: {'lr': 0.00043968038693180834, 'samples': 18136576, 'steps': 35422, 'loss/train': 1.5428553819656372} +03/05/2022 07:06:23 - INFO - codeparrot_training - Step 35423: {'lr': 0.00043967692999575484, 'samples': 18137088, 'steps': 35423, 'loss/train': 1.3712791204452515} +03/05/2022 07:06:24 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 07:06:28 - INFO - codeparrot_training - Step 35424: {'lr': 0.00043967347297423575, 'samples': 18137600, 'steps': 35424, 'loss/train': 2.416916608810425} +03/05/2022 07:06:32 - INFO - codeparrot_training - Step 35425: {'lr': 0.0004396700158672528, 'samples': 18138112, 'steps': 35425, 'loss/train': 1.770281434059143} +03/05/2022 07:06:32 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 07:06:37 - INFO - codeparrot_training - Step 35426: {'lr': 0.0004396665586748075, 'samples': 18138624, 'steps': 35426, 'loss/train': 1.5821340084075928} +03/05/2022 07:06:40 - INFO - codeparrot_training - Step 35427: {'lr': 0.0004396631013969013, 'samples': 18139136, 'steps': 35427, 'loss/train': 2.406684398651123} +03/05/2022 07:06:42 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/05/2022 07:06:46 - INFO - codeparrot_training - Step 35428: {'lr': 0.0004396596440335359, 'samples': 18139648, 'steps': 35428, 'loss/train': 1.2668864727020264} +03/05/2022 07:06:49 - INFO - codeparrot_training - Step 35429: {'lr': 0.00043965618658471276, 'samples': 18140160, 'steps': 35429, 'loss/train': 2.0522406101226807} +03/05/2022 07:06:51 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/05/2022 07:06:55 - INFO - codeparrot_training - Step 35430: {'lr': 0.0004396527290504334, 'samples': 18140672, 'steps': 35430, 'loss/train': 2.2182958126068115} +03/05/2022 07:06:58 - INFO - codeparrot_training - Step 35431: {'lr': 0.00043964927143069955, 'samples': 18141184, 'steps': 35431, 'loss/train': 1.9079910516738892} +03/05/2022 07:07:01 - INFO - codeparrot_training - Step 35432: {'lr': 0.0004396458137255126, 'samples': 18141696, 'steps': 35432, 'loss/train': 2.1030709743499756} +03/05/2022 07:07:02 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/05/2022 07:07:06 - INFO - codeparrot_training - Step 35433: {'lr': 0.0004396423559348742, 'samples': 18142208, 'steps': 35433, 'loss/train': 1.1751620769500732} +03/05/2022 07:07:09 - INFO - codeparrot_training - Step 35434: {'lr': 0.0004396388980587859, 'samples': 18142720, 'steps': 35434, 'loss/train': 6.445013999938965} +03/05/2022 07:07:11 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 07:07:15 - INFO - codeparrot_training - Step 35435: {'lr': 0.0004396354400972492, 'samples': 18143232, 'steps': 35435, 'loss/train': 2.357273817062378} +03/05/2022 07:07:18 - INFO - codeparrot_training - Step 35436: {'lr': 0.0004396319820502657, 'samples': 18143744, 'steps': 35436, 'loss/train': 1.5290404558181763} +03/05/2022 07:07:19 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 07:07:23 - INFO - codeparrot_training - Step 35437: {'lr': 0.000439628523917837, 'samples': 18144256, 'steps': 35437, 'loss/train': 1.7425615787506104} +03/05/2022 07:07:26 - INFO - codeparrot_training - Step 35438: {'lr': 0.0004396250656999646, 'samples': 18144768, 'steps': 35438, 'loss/train': 1.2589610815048218} +03/05/2022 07:07:28 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/05/2022 07:07:32 - INFO - codeparrot_training - Step 35439: {'lr': 0.00043962160739665, 'samples': 18145280, 'steps': 35439, 'loss/train': 1.6665329933166504} +03/05/2022 07:07:35 - INFO - codeparrot_training - Step 35440: {'lr': 0.0004396181490078949, 'samples': 18145792, 'steps': 35440, 'loss/train': 1.3968522548675537} +03/05/2022 07:07:36 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 07:07:40 - INFO - codeparrot_training - Step 35441: {'lr': 0.0004396146905337008, 'samples': 18146304, 'steps': 35441, 'loss/train': 2.1200029850006104} +03/05/2022 07:07:43 - INFO - codeparrot_training - Step 35442: {'lr': 0.0004396112319740692, 'samples': 18146816, 'steps': 35442, 'loss/train': 1.8281176090240479} +03/05/2022 07:07:44 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/05/2022 07:07:49 - INFO - codeparrot_training - Step 35443: {'lr': 0.0004396077733290017, 'samples': 18147328, 'steps': 35443, 'loss/train': 2.0901455879211426} +03/05/2022 07:07:52 - INFO - codeparrot_training - Step 35444: {'lr': 0.00043960431459849993, 'samples': 18147840, 'steps': 35444, 'loss/train': 2.52892804145813} +03/05/2022 07:07:52 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 07:07:57 - INFO - codeparrot_training - Step 35445: {'lr': 0.00043960085578256537, 'samples': 18148352, 'steps': 35445, 'loss/train': 1.5809903144836426} +03/05/2022 07:08:00 - INFO - codeparrot_training - Step 35446: {'lr': 0.0004395973968811995, 'samples': 18148864, 'steps': 35446, 'loss/train': 2.4038071632385254} +03/05/2022 07:08:01 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 07:08:05 - INFO - codeparrot_training - Step 35447: {'lr': 0.00043959393789440407, 'samples': 18149376, 'steps': 35447, 'loss/train': 0.4924311935901642} +03/05/2022 07:08:09 - INFO - codeparrot_training - Step 35448: {'lr': 0.0004395904788221805, 'samples': 18149888, 'steps': 35448, 'loss/train': 1.245524287223816} +03/05/2022 07:08:09 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/05/2022 07:08:14 - INFO - codeparrot_training - Step 35449: {'lr': 0.00043958701966453033, 'samples': 18150400, 'steps': 35449, 'loss/train': 2.1025407314300537} +03/05/2022 07:08:17 - INFO - codeparrot_training - Step 35450: {'lr': 0.00043958356042145524, 'samples': 18150912, 'steps': 35450, 'loss/train': 2.7735891342163086} +03/05/2022 07:08:18 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/05/2022 07:08:23 - INFO - codeparrot_training - Step 35451: {'lr': 0.0004395801010929567, 'samples': 18151424, 'steps': 35451, 'loss/train': 2.761763572692871} +03/05/2022 07:08:26 - INFO - codeparrot_training - Step 35452: {'lr': 0.0004395766416790363, 'samples': 18151936, 'steps': 35452, 'loss/train': 2.127596616744995} +03/05/2022 07:08:27 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 07:08:31 - INFO - codeparrot_training - Step 35453: {'lr': 0.0004395731821796956, 'samples': 18152448, 'steps': 35453, 'loss/train': 0.496438205242157} +03/05/2022 07:08:34 - INFO - codeparrot_training - Step 35454: {'lr': 0.00043956972259493615, 'samples': 18152960, 'steps': 35454, 'loss/train': 1.444140076637268} +03/05/2022 07:08:35 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 07:08:40 - INFO - codeparrot_training - Step 35455: {'lr': 0.0004395662629247595, 'samples': 18153472, 'steps': 35455, 'loss/train': 2.186835527420044} +03/05/2022 07:08:43 - INFO - codeparrot_training - Step 35456: {'lr': 0.0004395628031691672, 'samples': 18153984, 'steps': 35456, 'loss/train': 2.2724175453186035} +03/05/2022 07:08:44 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/05/2022 07:08:48 - INFO - codeparrot_training - Step 35457: {'lr': 0.00043955934332816083, 'samples': 18154496, 'steps': 35457, 'loss/train': 1.7552236318588257} +03/05/2022 07:08:51 - INFO - codeparrot_training - Step 35458: {'lr': 0.00043955588340174195, 'samples': 18155008, 'steps': 35458, 'loss/train': 1.7095812559127808} +03/05/2022 07:08:52 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 07:08:56 - INFO - codeparrot_training - Step 35459: {'lr': 0.00043955242338991217, 'samples': 18155520, 'steps': 35459, 'loss/train': 1.1616950035095215} +03/05/2022 07:08:59 - INFO - codeparrot_training - Step 35460: {'lr': 0.0004395489632926729, 'samples': 18156032, 'steps': 35460, 'loss/train': 2.2740976810455322} +03/05/2022 07:09:01 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/05/2022 07:09:05 - INFO - codeparrot_training - Step 35461: {'lr': 0.0004395455031100258, 'samples': 18156544, 'steps': 35461, 'loss/train': 1.907161831855774} +03/05/2022 07:09:08 - INFO - codeparrot_training - Step 35462: {'lr': 0.0004395420428419725, 'samples': 18157056, 'steps': 35462, 'loss/train': 1.5770634412765503} +03/05/2022 07:09:09 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 07:09:13 - INFO - codeparrot_training - Step 35463: {'lr': 0.0004395385824885144, 'samples': 18157568, 'steps': 35463, 'loss/train': 1.1826404333114624} +03/05/2022 07:09:17 - INFO - codeparrot_training - Step 35464: {'lr': 0.0004395351220496532, 'samples': 18158080, 'steps': 35464, 'loss/train': 1.6916604042053223} +03/05/2022 07:09:18 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/05/2022 07:09:22 - INFO - codeparrot_training - Step 35465: {'lr': 0.00043953166152539035, 'samples': 18158592, 'steps': 35465, 'loss/train': 1.3167383670806885} +03/05/2022 07:09:25 - INFO - codeparrot_training - Step 35466: {'lr': 0.00043952820091572753, 'samples': 18159104, 'steps': 35466, 'loss/train': 1.7261786460876465} +03/05/2022 07:09:26 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 07:09:31 - INFO - codeparrot_training - Step 35467: {'lr': 0.0004395247402206662, 'samples': 18159616, 'steps': 35467, 'loss/train': 1.892163872718811} +03/05/2022 07:09:34 - INFO - codeparrot_training - Step 35468: {'lr': 0.0004395212794402079, 'samples': 18160128, 'steps': 35468, 'loss/train': 1.805734634399414} +03/05/2022 07:09:35 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 07:09:40 - INFO - codeparrot_training - Step 35469: {'lr': 0.00043951781857435424, 'samples': 18160640, 'steps': 35469, 'loss/train': 1.634075403213501} +03/05/2022 07:09:43 - INFO - codeparrot_training - Step 35470: {'lr': 0.00043951435762310686, 'samples': 18161152, 'steps': 35470, 'loss/train': 1.328317642211914} +03/05/2022 07:09:46 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 07:09:48 - INFO - codeparrot_training - Step 35471: {'lr': 0.0004395108965864671, 'samples': 18161664, 'steps': 35471, 'loss/train': 1.683003306388855} +03/05/2022 07:09:51 - INFO - codeparrot_training - Step 35472: {'lr': 0.00043950743546443676, 'samples': 18162176, 'steps': 35472, 'loss/train': 2.213649272918701} +03/05/2022 07:09:54 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 07:09:57 - INFO - codeparrot_training - Step 35473: {'lr': 0.0004395039742570173, 'samples': 18162688, 'steps': 35473, 'loss/train': 0.5966029167175293} +03/05/2022 07:10:00 - INFO - codeparrot_training - Step 35474: {'lr': 0.00043950051296421023, 'samples': 18163200, 'steps': 35474, 'loss/train': 2.132533311843872} +03/05/2022 07:10:02 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 07:10:05 - INFO - codeparrot_training - Step 35475: {'lr': 0.00043949705158601715, 'samples': 18163712, 'steps': 35475, 'loss/train': 2.346548557281494} +03/05/2022 07:10:08 - INFO - codeparrot_training - Step 35476: {'lr': 0.00043949359012243963, 'samples': 18164224, 'steps': 35476, 'loss/train': 0.19669486582279205} +03/05/2022 07:10:11 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 07:10:14 - INFO - codeparrot_training - Step 35477: {'lr': 0.00043949012857347924, 'samples': 18164736, 'steps': 35477, 'loss/train': 2.09114670753479} +03/05/2022 07:10:17 - INFO - codeparrot_training - Step 35478: {'lr': 0.0004394866669391375, 'samples': 18165248, 'steps': 35478, 'loss/train': 0.9605236649513245} +03/05/2022 07:10:19 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 07:10:23 - INFO - codeparrot_training - Step 35479: {'lr': 0.00043948320521941596, 'samples': 18165760, 'steps': 35479, 'loss/train': 2.3185317516326904} +03/05/2022 07:10:26 - INFO - codeparrot_training - Step 35480: {'lr': 0.00043947974341431627, 'samples': 18166272, 'steps': 35480, 'loss/train': 1.4365769624710083} +03/05/2022 07:10:29 - INFO - codeparrot_training - Step 35481: {'lr': 0.0004394762815238399, 'samples': 18166784, 'steps': 35481, 'loss/train': 0.5044175982475281} +03/05/2022 07:10:29 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 07:10:34 - INFO - codeparrot_training - Step 35482: {'lr': 0.00043947281954798844, 'samples': 18167296, 'steps': 35482, 'loss/train': 1.957090139389038} +03/05/2022 07:10:38 - INFO - codeparrot_training - Step 35483: {'lr': 0.0004394693574867635, 'samples': 18167808, 'steps': 35483, 'loss/train': 1.3616678714752197} +03/05/2022 07:10:38 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/05/2022 07:10:43 - INFO - codeparrot_training - Step 35484: {'lr': 0.0004394658953401666, 'samples': 18168320, 'steps': 35484, 'loss/train': 0.8078387975692749} +03/05/2022 07:10:46 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/05/2022 07:10:49 - INFO - codeparrot_training - Step 35485: {'lr': 0.0004394624331081992, 'samples': 18168832, 'steps': 35485, 'loss/train': 1.4853671789169312} +03/05/2022 07:10:52 - INFO - codeparrot_training - Step 35486: {'lr': 0.00043945897079086295, 'samples': 18169344, 'steps': 35486, 'loss/train': 1.071881890296936} +03/05/2022 07:10:55 - INFO - codeparrot_training - Step 35487: {'lr': 0.00043945550838815953, 'samples': 18169856, 'steps': 35487, 'loss/train': 1.9065582752227783} +03/05/2022 07:10:56 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 07:11:00 - INFO - codeparrot_training - Step 35488: {'lr': 0.00043945204590009027, 'samples': 18170368, 'steps': 35488, 'loss/train': 1.715984582901001} +03/05/2022 07:11:03 - INFO - codeparrot_training - Step 35489: {'lr': 0.0004394485833266569, 'samples': 18170880, 'steps': 35489, 'loss/train': 2.254807710647583} +03/05/2022 07:11:04 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/05/2022 07:11:09 - INFO - codeparrot_training - Step 35490: {'lr': 0.0004394451206678609, 'samples': 18171392, 'steps': 35490, 'loss/train': 1.1345527172088623} +03/05/2022 07:11:12 - INFO - codeparrot_training - Step 35491: {'lr': 0.00043944165792370385, 'samples': 18171904, 'steps': 35491, 'loss/train': 1.626334547996521} +03/05/2022 07:11:13 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 07:11:17 - INFO - codeparrot_training - Step 35492: {'lr': 0.00043943819509418723, 'samples': 18172416, 'steps': 35492, 'loss/train': 2.012094497680664} +03/05/2022 07:11:20 - INFO - codeparrot_training - Step 35493: {'lr': 0.00043943473217931283, 'samples': 18172928, 'steps': 35493, 'loss/train': 1.216780424118042} +03/05/2022 07:11:21 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/05/2022 07:11:26 - INFO - codeparrot_training - Step 35494: {'lr': 0.0004394312691790821, 'samples': 18173440, 'steps': 35494, 'loss/train': 0.43173399567604065} +03/05/2022 07:11:29 - INFO - codeparrot_training - Step 35495: {'lr': 0.00043942780609349636, 'samples': 18173952, 'steps': 35495, 'loss/train': 1.4299708604812622} +03/05/2022 07:11:29 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 07:11:34 - INFO - codeparrot_training - Step 35496: {'lr': 0.0004394243429225575, 'samples': 18174464, 'steps': 35496, 'loss/train': 6.322597026824951} +03/05/2022 07:11:37 - INFO - codeparrot_training - Step 35497: {'lr': 0.0004394208796662669, 'samples': 18174976, 'steps': 35497, 'loss/train': 1.6650282144546509} +03/05/2022 07:11:38 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/05/2022 07:11:43 - INFO - codeparrot_training - Step 35498: {'lr': 0.00043941741632462625, 'samples': 18175488, 'steps': 35498, 'loss/train': 1.5233014822006226} +03/05/2022 07:11:46 - INFO - codeparrot_training - Step 35499: {'lr': 0.000439413952897637, 'samples': 18176000, 'steps': 35499, 'loss/train': 0.5743542909622192} +03/05/2022 07:11:46 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/05/2022 07:11:51 - INFO - codeparrot_training - Step 35500: {'lr': 0.0004394104893853007, 'samples': 18176512, 'steps': 35500, 'loss/train': 1.6607469320297241} +03/05/2022 07:11:54 - INFO - codeparrot_training - Step 35501: {'lr': 0.00043940702578761906, 'samples': 18177024, 'steps': 35501, 'loss/train': 2.5239627361297607} +03/05/2022 07:11:55 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/05/2022 07:11:59 - INFO - codeparrot_training - Step 35502: {'lr': 0.00043940356210459344, 'samples': 18177536, 'steps': 35502, 'loss/train': 2.0733163356781006} +03/05/2022 07:12:03 - INFO - codeparrot_training - Step 35503: {'lr': 0.0004394000983362255, 'samples': 18178048, 'steps': 35503, 'loss/train': 1.381777048110962} +03/05/2022 07:12:03 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 07:12:08 - INFO - codeparrot_training - Step 35504: {'lr': 0.0004393966344825168, 'samples': 18178560, 'steps': 35504, 'loss/train': 2.083183765411377} +03/05/2022 07:12:11 - INFO - codeparrot_training - Step 35505: {'lr': 0.00043939317054346894, 'samples': 18179072, 'steps': 35505, 'loss/train': 1.6750946044921875} +03/05/2022 07:12:12 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 07:12:16 - INFO - codeparrot_training - Step 35506: {'lr': 0.00043938970651908346, 'samples': 18179584, 'steps': 35506, 'loss/train': 1.3924260139465332} +03/05/2022 07:12:19 - INFO - codeparrot_training - Step 35507: {'lr': 0.0004393862424093619, 'samples': 18180096, 'steps': 35507, 'loss/train': 1.8256967067718506} +03/05/2022 07:12:20 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/05/2022 07:12:25 - INFO - codeparrot_training - Step 35508: {'lr': 0.0004393827782143057, 'samples': 18180608, 'steps': 35508, 'loss/train': 1.2805529832839966} +03/05/2022 07:12:28 - INFO - codeparrot_training - Step 35509: {'lr': 0.00043937931393391667, 'samples': 18181120, 'steps': 35509, 'loss/train': 2.308640718460083} +03/05/2022 07:12:28 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/05/2022 07:12:33 - INFO - codeparrot_training - Step 35510: {'lr': 0.0004393758495681962, 'samples': 18181632, 'steps': 35510, 'loss/train': 1.4325696229934692} +03/05/2022 07:12:37 - INFO - codeparrot_training - Step 35511: {'lr': 0.0004393723851171459, 'samples': 18182144, 'steps': 35511, 'loss/train': 1.8442778587341309} +03/05/2022 07:12:37 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/05/2022 07:12:42 - INFO - codeparrot_training - Step 35512: {'lr': 0.0004393689205807673, 'samples': 18182656, 'steps': 35512, 'loss/train': 1.6317551136016846} +03/05/2022 07:12:45 - INFO - codeparrot_training - Step 35513: {'lr': 0.00043936545595906206, 'samples': 18183168, 'steps': 35513, 'loss/train': 2.220817804336548} +03/05/2022 07:12:45 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/05/2022 07:12:50 - INFO - codeparrot_training - Step 35514: {'lr': 0.00043936199125203156, 'samples': 18183680, 'steps': 35514, 'loss/train': 1.5724210739135742} +03/05/2022 07:12:53 - INFO - codeparrot_training - Step 35515: {'lr': 0.00043935852645967755, 'samples': 18184192, 'steps': 35515, 'loss/train': 0.8419063687324524} +03/05/2022 07:12:54 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 07:12:59 - INFO - codeparrot_training - Step 35516: {'lr': 0.00043935506158200143, 'samples': 18184704, 'steps': 35516, 'loss/train': 1.531604290008545} +03/05/2022 07:13:03 - INFO - codeparrot_training - Step 35517: {'lr': 0.000439351596619005, 'samples': 18185216, 'steps': 35517, 'loss/train': 2.169106960296631} +03/05/2022 07:13:06 - INFO - codeparrot_training - Step 35518: {'lr': 0.00043934813157068956, 'samples': 18185728, 'steps': 35518, 'loss/train': 1.196284532546997} +03/05/2022 07:13:06 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 07:13:11 - INFO - codeparrot_training - Step 35519: {'lr': 0.00043934466643705673, 'samples': 18186240, 'steps': 35519, 'loss/train': 1.7818623781204224} +03/05/2022 07:13:15 - INFO - codeparrot_training - Step 35520: {'lr': 0.00043934120121810814, 'samples': 18186752, 'steps': 35520, 'loss/train': 1.0350135564804077} +03/05/2022 07:13:15 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 07:13:20 - INFO - codeparrot_training - Step 35521: {'lr': 0.0004393377359138454, 'samples': 18187264, 'steps': 35521, 'loss/train': 1.635295033454895} +03/05/2022 07:13:23 - INFO - codeparrot_training - Step 35522: {'lr': 0.00043933427052426986, 'samples': 18187776, 'steps': 35522, 'loss/train': 1.3483483791351318} +03/05/2022 07:13:23 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 07:13:28 - INFO - codeparrot_training - Step 35523: {'lr': 0.00043933080504938337, 'samples': 18188288, 'steps': 35523, 'loss/train': 2.1573848724365234} +03/05/2022 07:13:32 - INFO - codeparrot_training - Step 35524: {'lr': 0.00043932733948918724, 'samples': 18188800, 'steps': 35524, 'loss/train': 0.502007007598877} +03/05/2022 07:13:32 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/05/2022 07:13:37 - INFO - codeparrot_training - Step 35525: {'lr': 0.0004393238738436832, 'samples': 18189312, 'steps': 35525, 'loss/train': 1.383028507232666} +03/05/2022 07:13:40 - INFO - codeparrot_training - Step 35526: {'lr': 0.00043932040811287264, 'samples': 18189824, 'steps': 35526, 'loss/train': 1.2182127237319946} +03/05/2022 07:13:41 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/05/2022 07:13:45 - INFO - codeparrot_training - Step 35527: {'lr': 0.0004393169422967573, 'samples': 18190336, 'steps': 35527, 'loss/train': 1.360651969909668} +03/05/2022 07:13:49 - INFO - codeparrot_training - Step 35528: {'lr': 0.0004393134763953387, 'samples': 18190848, 'steps': 35528, 'loss/train': 1.233083724975586} +03/05/2022 07:13:49 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/05/2022 07:13:54 - INFO - codeparrot_training - Step 35529: {'lr': 0.00043931001040861835, 'samples': 18191360, 'steps': 35529, 'loss/train': 2.583538770675659} +03/05/2022 07:13:57 - INFO - codeparrot_training - Step 35530: {'lr': 0.00043930654433659775, 'samples': 18191872, 'steps': 35530, 'loss/train': 1.9437367916107178} +03/05/2022 07:13:58 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 07:14:03 - INFO - codeparrot_training - Step 35531: {'lr': 0.0004393030781792787, 'samples': 18192384, 'steps': 35531, 'loss/train': 0.9135006666183472} +03/05/2022 07:14:06 - INFO - codeparrot_training - Step 35532: {'lr': 0.00043929961193666246, 'samples': 18192896, 'steps': 35532, 'loss/train': 2.134550094604492} +03/05/2022 07:14:07 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/05/2022 07:14:11 - INFO - codeparrot_training - Step 35533: {'lr': 0.0004392961456087508, 'samples': 18193408, 'steps': 35533, 'loss/train': 1.5983855724334717} +03/05/2022 07:14:14 - INFO - codeparrot_training - Step 35534: {'lr': 0.00043929267919554516, 'samples': 18193920, 'steps': 35534, 'loss/train': 2.3285133838653564} +03/05/2022 07:14:16 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 07:14:20 - INFO - codeparrot_training - Step 35535: {'lr': 0.00043928921269704725, 'samples': 18194432, 'steps': 35535, 'loss/train': 1.2281553745269775} +03/05/2022 07:14:23 - INFO - codeparrot_training - Step 35536: {'lr': 0.00043928574611325845, 'samples': 18194944, 'steps': 35536, 'loss/train': 0.7840021848678589} +03/05/2022 07:14:25 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/05/2022 07:14:28 - INFO - codeparrot_training - Step 35537: {'lr': 0.00043928227944418046, 'samples': 18195456, 'steps': 35537, 'loss/train': 1.269748330116272} +03/05/2022 07:14:31 - INFO - codeparrot_training - Step 35538: {'lr': 0.00043927881268981484, 'samples': 18195968, 'steps': 35538, 'loss/train': 2.0974810123443604} +03/05/2022 07:14:33 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 07:14:37 - INFO - codeparrot_training - Step 35539: {'lr': 0.00043927534585016305, 'samples': 18196480, 'steps': 35539, 'loss/train': 1.9360780715942383} +03/05/2022 07:14:40 - INFO - codeparrot_training - Step 35540: {'lr': 0.0004392718789252267, 'samples': 18196992, 'steps': 35540, 'loss/train': 0.7840116620063782} +03/05/2022 07:14:41 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 07:14:45 - INFO - codeparrot_training - Step 35541: {'lr': 0.0004392684119150074, 'samples': 18197504, 'steps': 35541, 'loss/train': 0.9602648019790649} +03/05/2022 07:14:48 - INFO - codeparrot_training - Step 35542: {'lr': 0.0004392649448195066, 'samples': 18198016, 'steps': 35542, 'loss/train': 1.4598722457885742} +03/05/2022 07:14:49 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 07:14:54 - INFO - codeparrot_training - Step 35543: {'lr': 0.000439261477638726, 'samples': 18198528, 'steps': 35543, 'loss/train': 1.2526623010635376} +03/05/2022 07:14:57 - INFO - codeparrot_training - Step 35544: {'lr': 0.0004392580103726671, 'samples': 18199040, 'steps': 35544, 'loss/train': 1.5337557792663574} +03/05/2022 07:14:59 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 07:15:02 - INFO - codeparrot_training - Step 35545: {'lr': 0.0004392545430213315, 'samples': 18199552, 'steps': 35545, 'loss/train': 1.4768348932266235} +03/05/2022 07:15:06 - INFO - codeparrot_training - Step 35546: {'lr': 0.00043925107558472065, 'samples': 18200064, 'steps': 35546, 'loss/train': 1.4838359355926514} +03/05/2022 07:15:08 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 07:15:11 - INFO - codeparrot_training - Step 35547: {'lr': 0.0004392476080628363, 'samples': 18200576, 'steps': 35547, 'loss/train': 0.7412009835243225} +03/05/2022 07:15:14 - INFO - codeparrot_training - Step 35548: {'lr': 0.00043924414045567973, 'samples': 18201088, 'steps': 35548, 'loss/train': 2.0830180644989014} +03/05/2022 07:15:16 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/05/2022 07:15:19 - INFO - codeparrot_training - Step 35549: {'lr': 0.00043924067276325274, 'samples': 18201600, 'steps': 35549, 'loss/train': 0.8914379477500916} +03/05/2022 07:15:22 - INFO - codeparrot_training - Step 35550: {'lr': 0.0004392372049855569, 'samples': 18202112, 'steps': 35550, 'loss/train': 1.7571898698806763} +03/05/2022 07:15:25 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/05/2022 07:15:28 - INFO - codeparrot_training - Step 35551: {'lr': 0.0004392337371225936, 'samples': 18202624, 'steps': 35551, 'loss/train': 1.8011308908462524} +03/05/2022 07:15:31 - INFO - codeparrot_training - Step 35552: {'lr': 0.0004392302691743645, 'samples': 18203136, 'steps': 35552, 'loss/train': 1.298619031906128} +03/05/2022 07:15:33 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 07:15:36 - INFO - codeparrot_training - Step 35553: {'lr': 0.0004392268011408712, 'samples': 18203648, 'steps': 35553, 'loss/train': 0.891318142414093} +03/05/2022 07:15:39 - INFO - codeparrot_training - Step 35554: {'lr': 0.0004392233330221152, 'samples': 18204160, 'steps': 35554, 'loss/train': 1.9722446203231812} +03/05/2022 07:15:42 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 07:15:45 - INFO - codeparrot_training - Step 35555: {'lr': 0.0004392198648180981, 'samples': 18204672, 'steps': 35555, 'loss/train': 1.735204815864563} +03/05/2022 07:15:48 - INFO - codeparrot_training - Step 35556: {'lr': 0.0004392163965288215, 'samples': 18205184, 'steps': 35556, 'loss/train': 1.578781247138977} +03/05/2022 07:15:50 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/05/2022 07:15:53 - INFO - codeparrot_training - Step 35557: {'lr': 0.0004392129281542868, 'samples': 18205696, 'steps': 35557, 'loss/train': 1.948217749595642} +03/05/2022 07:15:56 - INFO - codeparrot_training - Step 35558: {'lr': 0.00043920945969449577, 'samples': 18206208, 'steps': 35558, 'loss/train': 1.560465931892395} +03/05/2022 07:15:58 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/05/2022 07:16:02 - INFO - codeparrot_training - Step 35559: {'lr': 0.0004392059911494498, 'samples': 18206720, 'steps': 35559, 'loss/train': 1.9493038654327393} +03/05/2022 07:16:05 - INFO - codeparrot_training - Step 35560: {'lr': 0.0004392025225191506, 'samples': 18207232, 'steps': 35560, 'loss/train': 1.4980286359786987} +03/05/2022 07:16:07 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 07:16:10 - INFO - codeparrot_training - Step 35561: {'lr': 0.0004391990538035996, 'samples': 18207744, 'steps': 35561, 'loss/train': 1.2428779602050781} +03/05/2022 07:16:13 - INFO - codeparrot_training - Step 35562: {'lr': 0.00043919558500279845, 'samples': 18208256, 'steps': 35562, 'loss/train': 1.3592733144760132} +03/05/2022 07:16:15 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 07:16:18 - INFO - codeparrot_training - Step 35563: {'lr': 0.0004391921161167487, 'samples': 18208768, 'steps': 35563, 'loss/train': 1.3949799537658691} +03/05/2022 07:16:22 - INFO - codeparrot_training - Step 35564: {'lr': 0.00043918864714545194, 'samples': 18209280, 'steps': 35564, 'loss/train': 2.0902833938598633} +03/05/2022 07:16:27 - INFO - codeparrot_training - Step 35565: {'lr': 0.00043918517808890964, 'samples': 18209792, 'steps': 35565, 'loss/train': 1.7150702476501465} +03/05/2022 07:16:30 - INFO - codeparrot_training - Step 35566: {'lr': 0.0004391817089471234, 'samples': 18210304, 'steps': 35566, 'loss/train': 1.2185090780258179} +03/05/2022 07:16:32 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 07:16:35 - INFO - codeparrot_training - Step 35567: {'lr': 0.0004391782397200949, 'samples': 18210816, 'steps': 35567, 'loss/train': 1.7002531290054321} +03/05/2022 07:16:38 - INFO - codeparrot_training - Step 35568: {'lr': 0.0004391747704078255, 'samples': 18211328, 'steps': 35568, 'loss/train': 1.7351813316345215} +03/05/2022 07:16:41 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/05/2022 07:16:44 - INFO - codeparrot_training - Step 35569: {'lr': 0.0004391713010103169, 'samples': 18211840, 'steps': 35569, 'loss/train': 2.4453492164611816} +03/05/2022 07:16:47 - INFO - codeparrot_training - Step 35570: {'lr': 0.0004391678315275706, 'samples': 18212352, 'steps': 35570, 'loss/train': 1.2943658828735352} +03/05/2022 07:16:49 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/05/2022 07:16:52 - INFO - codeparrot_training - Step 35571: {'lr': 0.00043916436195958825, 'samples': 18212864, 'steps': 35571, 'loss/train': 1.8873891830444336} +03/05/2022 07:16:55 - INFO - codeparrot_training - Step 35572: {'lr': 0.00043916089230637133, 'samples': 18213376, 'steps': 35572, 'loss/train': 1.5061194896697998} +03/05/2022 07:16:57 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 07:17:01 - INFO - codeparrot_training - Step 35573: {'lr': 0.0004391574225679215, 'samples': 18213888, 'steps': 35573, 'loss/train': 1.7620962858200073} +03/05/2022 07:17:04 - INFO - codeparrot_training - Step 35574: {'lr': 0.0004391539527442401, 'samples': 18214400, 'steps': 35574, 'loss/train': 1.66138756275177} +03/05/2022 07:17:06 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/05/2022 07:17:09 - INFO - codeparrot_training - Step 35575: {'lr': 0.000439150482835329, 'samples': 18214912, 'steps': 35575, 'loss/train': 2.205457925796509} +03/05/2022 07:17:12 - INFO - codeparrot_training - Step 35576: {'lr': 0.0004391470128411895, 'samples': 18215424, 'steps': 35576, 'loss/train': 2.679546594619751} +03/05/2022 07:17:14 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/05/2022 07:17:17 - INFO - codeparrot_training - Step 35577: {'lr': 0.00043914354276182335, 'samples': 18215936, 'steps': 35577, 'loss/train': 1.6146609783172607} +03/05/2022 07:17:21 - INFO - codeparrot_training - Step 35578: {'lr': 0.00043914007259723196, 'samples': 18216448, 'steps': 35578, 'loss/train': 1.694009780883789} +03/05/2022 07:17:22 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/05/2022 07:17:26 - INFO - codeparrot_training - Step 35579: {'lr': 0.000439136602347417, 'samples': 18216960, 'steps': 35579, 'loss/train': 2.3900341987609863} +03/05/2022 07:17:29 - INFO - codeparrot_training - Step 35580: {'lr': 0.00043913313201238017, 'samples': 18217472, 'steps': 35580, 'loss/train': 1.9951441287994385} +03/05/2022 07:17:34 - INFO - codeparrot_training - Step 35581: {'lr': 0.00043912966159212263, 'samples': 18217984, 'steps': 35581, 'loss/train': 1.2046443223953247} +03/05/2022 07:17:37 - INFO - codeparrot_training - Step 35582: {'lr': 0.0004391261910866463, 'samples': 18218496, 'steps': 35582, 'loss/train': 2.3222105503082275} +03/05/2022 07:17:39 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 07:17:43 - INFO - codeparrot_training - Step 35583: {'lr': 0.0004391227204959526, 'samples': 18219008, 'steps': 35583, 'loss/train': 2.3945767879486084} +03/05/2022 07:17:46 - INFO - codeparrot_training - Step 35584: {'lr': 0.00043911924982004315, 'samples': 18219520, 'steps': 35584, 'loss/train': 0.5633180141448975} +03/05/2022 07:17:48 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 07:17:51 - INFO - codeparrot_training - Step 35585: {'lr': 0.0004391157790589195, 'samples': 18220032, 'steps': 35585, 'loss/train': 0.3848421573638916} +03/05/2022 07:17:55 - INFO - codeparrot_training - Step 35586: {'lr': 0.00043911230821258313, 'samples': 18220544, 'steps': 35586, 'loss/train': 3.2703511714935303} +03/05/2022 07:17:57 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/05/2022 07:18:00 - INFO - codeparrot_training - Step 35587: {'lr': 0.00043910883728103575, 'samples': 18221056, 'steps': 35587, 'loss/train': 0.9034063816070557} +03/05/2022 07:18:03 - INFO - codeparrot_training - Step 35588: {'lr': 0.0004391053662642788, 'samples': 18221568, 'steps': 35588, 'loss/train': 1.2314529418945312} +03/05/2022 07:18:05 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 07:18:08 - INFO - codeparrot_training - Step 35589: {'lr': 0.00043910189516231386, 'samples': 18222080, 'steps': 35589, 'loss/train': 2.209214448928833} +03/05/2022 07:18:11 - INFO - codeparrot_training - Step 35590: {'lr': 0.00043909842397514255, 'samples': 18222592, 'steps': 35590, 'loss/train': 1.742717981338501} +03/05/2022 07:18:13 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/05/2022 07:18:17 - INFO - codeparrot_training - Step 35591: {'lr': 0.00043909495270276646, 'samples': 18223104, 'steps': 35591, 'loss/train': 0.9844104051589966} +03/05/2022 07:18:20 - INFO - codeparrot_training - Step 35592: {'lr': 0.00043909148134518703, 'samples': 18223616, 'steps': 35592, 'loss/train': 1.893539309501648} +03/05/2022 07:18:22 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 07:18:25 - INFO - codeparrot_training - Step 35593: {'lr': 0.0004390880099024059, 'samples': 18224128, 'steps': 35593, 'loss/train': 1.1582869291305542} +03/05/2022 07:18:29 - INFO - codeparrot_training - Step 35594: {'lr': 0.00043908453837442464, 'samples': 18224640, 'steps': 35594, 'loss/train': 1.4650366306304932} +03/05/2022 07:18:31 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 07:18:34 - INFO - codeparrot_training - Step 35595: {'lr': 0.0004390810667612448, 'samples': 18225152, 'steps': 35595, 'loss/train': 1.613234281539917} +03/05/2022 07:18:37 - INFO - codeparrot_training - Step 35596: {'lr': 0.00043907759506286797, 'samples': 18225664, 'steps': 35596, 'loss/train': 2.4216630458831787} +03/05/2022 07:18:39 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 07:18:42 - INFO - codeparrot_training - Step 35597: {'lr': 0.00043907412327929575, 'samples': 18226176, 'steps': 35597, 'loss/train': 1.9293183088302612} +03/05/2022 07:18:45 - INFO - codeparrot_training - Step 35598: {'lr': 0.00043907065141052953, 'samples': 18226688, 'steps': 35598, 'loss/train': 1.966095209121704} +03/05/2022 07:18:47 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 07:18:51 - INFO - codeparrot_training - Step 35599: {'lr': 0.00043906717945657104, 'samples': 18227200, 'steps': 35599, 'loss/train': 1.903415560722351} +03/05/2022 07:18:54 - INFO - codeparrot_training - Step 35600: {'lr': 0.00043906370741742185, 'samples': 18227712, 'steps': 35600, 'loss/train': 1.459360122680664} +03/05/2022 07:18:56 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 07:18:59 - INFO - codeparrot_training - Step 35601: {'lr': 0.0004390602352930834, 'samples': 18228224, 'steps': 35601, 'loss/train': 1.2528719902038574} +03/05/2022 07:19:03 - INFO - codeparrot_training - Step 35602: {'lr': 0.00043905676308355734, 'samples': 18228736, 'steps': 35602, 'loss/train': 3.0356974601745605} +03/05/2022 07:19:05 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 07:19:08 - INFO - codeparrot_training - Step 35603: {'lr': 0.00043905329078884527, 'samples': 18229248, 'steps': 35603, 'loss/train': 0.8444804549217224} +03/05/2022 07:19:11 - INFO - codeparrot_training - Step 35604: {'lr': 0.00043904981840894863, 'samples': 18229760, 'steps': 35604, 'loss/train': 2.0784029960632324} +03/05/2022 07:19:14 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/05/2022 07:19:16 - INFO - codeparrot_training - Step 35605: {'lr': 0.0004390463459438691, 'samples': 18230272, 'steps': 35605, 'loss/train': 1.8745253086090088} +03/05/2022 07:19:19 - INFO - codeparrot_training - Step 35606: {'lr': 0.0004390428733936082, 'samples': 18230784, 'steps': 35606, 'loss/train': 1.8388772010803223} +03/05/2022 07:19:25 - INFO - codeparrot_training - Step 35607: {'lr': 0.0004390394007581675, 'samples': 18231296, 'steps': 35607, 'loss/train': 1.691643476486206} +03/05/2022 07:19:28 - INFO - codeparrot_training - Step 35608: {'lr': 0.00043903592803754856, 'samples': 18231808, 'steps': 35608, 'loss/train': 1.3373836278915405} +03/05/2022 07:19:31 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 07:19:33 - INFO - codeparrot_training - Step 35609: {'lr': 0.00043903245523175296, 'samples': 18232320, 'steps': 35609, 'loss/train': 2.0462119579315186} +03/05/2022 07:19:36 - INFO - codeparrot_training - Step 35610: {'lr': 0.00043902898234078223, 'samples': 18232832, 'steps': 35610, 'loss/train': 1.7619023323059082} +03/05/2022 07:19:39 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/05/2022 07:19:42 - INFO - codeparrot_training - Step 35611: {'lr': 0.000439025509364638, 'samples': 18233344, 'steps': 35611, 'loss/train': 0.6433806419372559} +03/05/2022 07:19:45 - INFO - codeparrot_training - Step 35612: {'lr': 0.0004390220363033217, 'samples': 18233856, 'steps': 35612, 'loss/train': 6.154721260070801} +03/05/2022 07:19:47 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/05/2022 07:19:50 - INFO - codeparrot_training - Step 35613: {'lr': 0.0004390185631568351, 'samples': 18234368, 'steps': 35613, 'loss/train': 1.7400320768356323} +03/05/2022 07:19:53 - INFO - codeparrot_training - Step 35614: {'lr': 0.00043901508992517956, 'samples': 18234880, 'steps': 35614, 'loss/train': 1.95522940158844} +03/05/2022 07:19:55 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 07:19:59 - INFO - codeparrot_training - Step 35615: {'lr': 0.0004390116166083568, 'samples': 18235392, 'steps': 35615, 'loss/train': 1.9352779388427734} +03/05/2022 07:20:02 - INFO - codeparrot_training - Step 35616: {'lr': 0.00043900814320636827, 'samples': 18235904, 'steps': 35616, 'loss/train': 0.711889922618866} +03/05/2022 07:20:04 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 07:20:07 - INFO - codeparrot_training - Step 35617: {'lr': 0.00043900466971921563, 'samples': 18236416, 'steps': 35617, 'loss/train': 1.5717843770980835} +03/05/2022 07:20:10 - INFO - codeparrot_training - Step 35618: {'lr': 0.00043900119614690043, 'samples': 18236928, 'steps': 35618, 'loss/train': 1.1290003061294556} +03/05/2022 07:20:13 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/05/2022 07:20:16 - INFO - codeparrot_training - Step 35619: {'lr': 0.00043899772248942413, 'samples': 18237440, 'steps': 35619, 'loss/train': 1.0690252780914307} +03/05/2022 07:20:19 - INFO - codeparrot_training - Step 35620: {'lr': 0.0004389942487467884, 'samples': 18237952, 'steps': 35620, 'loss/train': 2.1186439990997314} +03/05/2022 07:20:23 - INFO - codeparrot_training - Step 35621: {'lr': 0.00043899077491899485, 'samples': 18238464, 'steps': 35621, 'loss/train': 2.1440682411193848} +03/05/2022 07:20:24 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 07:20:28 - INFO - codeparrot_training - Step 35622: {'lr': 0.0004389873010060449, 'samples': 18238976, 'steps': 35622, 'loss/train': 0.7072810530662537} +03/05/2022 07:20:31 - INFO - codeparrot_training - Step 35623: {'lr': 0.00043898382700794015, 'samples': 18239488, 'steps': 35623, 'loss/train': 1.4474983215332031} +03/05/2022 07:20:33 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 07:20:36 - INFO - codeparrot_training - Step 35624: {'lr': 0.0004389803529246823, 'samples': 18240000, 'steps': 35624, 'loss/train': 2.4290003776550293} +03/05/2022 07:20:40 - INFO - codeparrot_training - Step 35625: {'lr': 0.00043897687875627277, 'samples': 18240512, 'steps': 35625, 'loss/train': 1.6359623670578003} +03/05/2022 07:20:42 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/05/2022 07:20:45 - INFO - codeparrot_training - Step 35626: {'lr': 0.00043897340450271317, 'samples': 18241024, 'steps': 35626, 'loss/train': 0.7164183259010315} +03/05/2022 07:20:48 - INFO - codeparrot_training - Step 35627: {'lr': 0.0004389699301640051, 'samples': 18241536, 'steps': 35627, 'loss/train': 2.275535821914673} +03/05/2022 07:20:50 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/05/2022 07:20:53 - INFO - codeparrot_training - Step 35628: {'lr': 0.00043896645574015004, 'samples': 18242048, 'steps': 35628, 'loss/train': 2.32814359664917} +03/05/2022 07:20:57 - INFO - codeparrot_training - Step 35629: {'lr': 0.00043896298123114965, 'samples': 18242560, 'steps': 35629, 'loss/train': 1.890061378479004} +03/05/2022 07:20:58 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/05/2022 07:21:02 - INFO - codeparrot_training - Step 35630: {'lr': 0.00043895950663700546, 'samples': 18243072, 'steps': 35630, 'loss/train': 1.9485187530517578} +03/05/2022 07:21:05 - INFO - codeparrot_training - Step 35631: {'lr': 0.000438956031957719, 'samples': 18243584, 'steps': 35631, 'loss/train': 1.4140589237213135} +03/05/2022 07:21:07 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 07:21:10 - INFO - codeparrot_training - Step 35632: {'lr': 0.0004389525571932919, 'samples': 18244096, 'steps': 35632, 'loss/train': 2.2518224716186523} +03/05/2022 07:21:13 - INFO - codeparrot_training - Step 35633: {'lr': 0.00043894908234372564, 'samples': 18244608, 'steps': 35633, 'loss/train': 2.2509548664093018} +03/05/2022 07:21:15 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 07:21:19 - INFO - codeparrot_training - Step 35634: {'lr': 0.0004389456074090219, 'samples': 18245120, 'steps': 35634, 'loss/train': 1.710856318473816} +03/05/2022 07:21:22 - INFO - codeparrot_training - Step 35635: {'lr': 0.0004389421323891822, 'samples': 18245632, 'steps': 35635, 'loss/train': 0.9506700038909912} +03/05/2022 07:21:24 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/05/2022 07:21:27 - INFO - codeparrot_training - Step 35636: {'lr': 0.000438938657284208, 'samples': 18246144, 'steps': 35636, 'loss/train': 1.5618674755096436} +03/05/2022 07:21:30 - INFO - codeparrot_training - Step 35637: {'lr': 0.000438935182094101, 'samples': 18246656, 'steps': 35637, 'loss/train': 1.8619160652160645} +03/05/2022 07:21:33 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 07:21:36 - INFO - codeparrot_training - Step 35638: {'lr': 0.0004389317068188628, 'samples': 18247168, 'steps': 35638, 'loss/train': 0.7068888545036316} +03/05/2022 07:21:39 - INFO - codeparrot_training - Step 35639: {'lr': 0.0004389282314584948, 'samples': 18247680, 'steps': 35639, 'loss/train': 2.058039426803589} +03/05/2022 07:21:41 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 07:21:44 - INFO - codeparrot_training - Step 35640: {'lr': 0.0004389247560129987, 'samples': 18248192, 'steps': 35640, 'loss/train': 2.0194778442382812} +03/05/2022 07:21:47 - INFO - codeparrot_training - Step 35641: {'lr': 0.000438921280482376, 'samples': 18248704, 'steps': 35641, 'loss/train': 2.144296169281006} +03/05/2022 07:21:49 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 07:21:52 - INFO - codeparrot_training - Step 35642: {'lr': 0.00043891780486662825, 'samples': 18249216, 'steps': 35642, 'loss/train': 2.3053619861602783} +03/05/2022 07:21:55 - INFO - codeparrot_training - Step 35643: {'lr': 0.00043891432916575714, 'samples': 18249728, 'steps': 35643, 'loss/train': 1.8547847270965576} +03/05/2022 07:21:58 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 07:22:01 - INFO - codeparrot_training - Step 35644: {'lr': 0.0004389108533797641, 'samples': 18250240, 'steps': 35644, 'loss/train': 2.2515838146209717} +03/05/2022 07:22:04 - INFO - codeparrot_training - Step 35645: {'lr': 0.00043890737750865074, 'samples': 18250752, 'steps': 35645, 'loss/train': 1.921485424041748} +03/05/2022 07:22:06 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 07:22:09 - INFO - codeparrot_training - Step 35646: {'lr': 0.0004389039015524186, 'samples': 18251264, 'steps': 35646, 'loss/train': 2.322957754135132} +03/05/2022 07:22:13 - INFO - codeparrot_training - Step 35647: {'lr': 0.0004389004255110693, 'samples': 18251776, 'steps': 35647, 'loss/train': 2.6784703731536865} +03/05/2022 07:22:15 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 07:22:18 - INFO - codeparrot_training - Step 35648: {'lr': 0.0004388969493846044, 'samples': 18252288, 'steps': 35648, 'loss/train': 1.5431900024414062} +03/05/2022 07:22:21 - INFO - codeparrot_training - Step 35649: {'lr': 0.00043889347317302543, 'samples': 18252800, 'steps': 35649, 'loss/train': 1.628823161125183} +03/05/2022 07:22:23 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/05/2022 07:22:26 - INFO - codeparrot_training - Step 35650: {'lr': 0.000438889996876334, 'samples': 18253312, 'steps': 35650, 'loss/train': 0.5356725454330444} +03/05/2022 07:22:29 - INFO - codeparrot_training - Step 35651: {'lr': 0.00043888652049453163, 'samples': 18253824, 'steps': 35651, 'loss/train': 1.8715674877166748} +03/05/2022 07:22:31 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 07:22:35 - INFO - codeparrot_training - Step 35652: {'lr': 0.0004388830440276199, 'samples': 18254336, 'steps': 35652, 'loss/train': 1.2664936780929565} +03/05/2022 07:22:38 - INFO - codeparrot_training - Step 35653: {'lr': 0.0004388795674756004, 'samples': 18254848, 'steps': 35653, 'loss/train': 1.775356650352478} +03/05/2022 07:22:39 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/05/2022 07:22:43 - INFO - codeparrot_training - Step 35654: {'lr': 0.0004388760908384747, 'samples': 18255360, 'steps': 35654, 'loss/train': 0.935798704624176} +03/05/2022 07:22:46 - INFO - codeparrot_training - Step 35655: {'lr': 0.00043887261411624433, 'samples': 18255872, 'steps': 35655, 'loss/train': 1.4448611736297607} +03/05/2022 07:22:48 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 07:22:52 - INFO - codeparrot_training - Step 35656: {'lr': 0.00043886913730891087, 'samples': 18256384, 'steps': 35656, 'loss/train': 2.3188467025756836} +03/05/2022 07:22:55 - INFO - codeparrot_training - Step 35657: {'lr': 0.00043886566041647593, 'samples': 18256896, 'steps': 35657, 'loss/train': 1.8766250610351562} +03/05/2022 07:22:56 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 07:23:00 - INFO - codeparrot_training - Step 35658: {'lr': 0.000438862183438941, 'samples': 18257408, 'steps': 35658, 'loss/train': 1.5627849102020264} +03/05/2022 07:23:03 - INFO - codeparrot_training - Step 35659: {'lr': 0.00043885870637630763, 'samples': 18257920, 'steps': 35659, 'loss/train': 1.8145678043365479} +03/05/2022 07:23:05 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 07:23:08 - INFO - codeparrot_training - Step 35660: {'lr': 0.00043885522922857757, 'samples': 18258432, 'steps': 35660, 'loss/train': 1.4663059711456299} +03/05/2022 07:23:12 - INFO - codeparrot_training - Step 35661: {'lr': 0.00043885175199575216, 'samples': 18258944, 'steps': 35661, 'loss/train': 1.478531837463379} +03/05/2022 07:23:13 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 07:23:17 - INFO - codeparrot_training - Step 35662: {'lr': 0.00043884827467783303, 'samples': 18259456, 'steps': 35662, 'loss/train': 1.6084387302398682} +03/05/2022 07:23:20 - INFO - codeparrot_training - Step 35663: {'lr': 0.00043884479727482193, 'samples': 18259968, 'steps': 35663, 'loss/train': 1.3304052352905273} +03/05/2022 07:23:21 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/05/2022 07:23:25 - INFO - codeparrot_training - Step 35664: {'lr': 0.00043884131978672014, 'samples': 18260480, 'steps': 35664, 'loss/train': 1.6129413843154907} +03/05/2022 07:23:28 - INFO - codeparrot_training - Step 35665: {'lr': 0.00043883784221352947, 'samples': 18260992, 'steps': 35665, 'loss/train': 0.9870860576629639} +03/05/2022 07:23:30 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 07:23:34 - INFO - codeparrot_training - Step 35666: {'lr': 0.00043883436455525125, 'samples': 18261504, 'steps': 35666, 'loss/train': 1.9057637453079224} +03/05/2022 07:23:37 - INFO - codeparrot_training - Step 35667: {'lr': 0.0004388308868118873, 'samples': 18262016, 'steps': 35667, 'loss/train': 1.8430458307266235} +03/05/2022 07:23:38 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/05/2022 07:23:42 - INFO - codeparrot_training - Step 35668: {'lr': 0.00043882740898343905, 'samples': 18262528, 'steps': 35668, 'loss/train': 1.4988353252410889} +03/05/2022 07:23:45 - INFO - codeparrot_training - Step 35669: {'lr': 0.00043882393106990804, 'samples': 18263040, 'steps': 35669, 'loss/train': 0.7078030109405518} +03/05/2022 07:23:46 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 07:23:51 - INFO - codeparrot_training - Step 35670: {'lr': 0.0004388204530712959, 'samples': 18263552, 'steps': 35670, 'loss/train': 2.078819990158081} +03/05/2022 07:23:54 - INFO - codeparrot_training - Step 35671: {'lr': 0.0004388169749876042, 'samples': 18264064, 'steps': 35671, 'loss/train': 1.9951900243759155} +03/05/2022 07:23:55 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 07:23:59 - INFO - codeparrot_training - Step 35672: {'lr': 0.0004388134968188344, 'samples': 18264576, 'steps': 35672, 'loss/train': 1.3254313468933105} +03/05/2022 07:24:02 - INFO - codeparrot_training - Step 35673: {'lr': 0.00043881001856498823, 'samples': 18265088, 'steps': 35673, 'loss/train': 1.6378891468048096} +03/05/2022 07:24:03 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/05/2022 07:24:07 - INFO - codeparrot_training - Step 35674: {'lr': 0.0004388065402260672, 'samples': 18265600, 'steps': 35674, 'loss/train': 1.0619419813156128} +03/05/2022 07:24:11 - INFO - codeparrot_training - Step 35675: {'lr': 0.0004388030618020729, 'samples': 18266112, 'steps': 35675, 'loss/train': 1.3701406717300415} +03/05/2022 07:24:11 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 07:24:16 - INFO - codeparrot_training - Step 35676: {'lr': 0.0004387995832930067, 'samples': 18266624, 'steps': 35676, 'loss/train': 2.228940010070801} +03/05/2022 07:24:19 - INFO - codeparrot_training - Step 35677: {'lr': 0.00043879610469887043, 'samples': 18267136, 'steps': 35677, 'loss/train': 0.3754729628562927} +03/05/2022 07:24:20 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/05/2022 07:24:24 - INFO - codeparrot_training - Step 35678: {'lr': 0.00043879262601966544, 'samples': 18267648, 'steps': 35678, 'loss/train': 1.2399615049362183} +03/05/2022 07:24:27 - INFO - codeparrot_training - Step 35679: {'lr': 0.00043878914725539356, 'samples': 18268160, 'steps': 35679, 'loss/train': 2.027195930480957} +03/05/2022 07:24:28 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 07:24:33 - INFO - codeparrot_training - Step 35680: {'lr': 0.00043878566840605606, 'samples': 18268672, 'steps': 35680, 'loss/train': 1.48850417137146} +03/05/2022 07:24:36 - INFO - codeparrot_training - Step 35681: {'lr': 0.0004387821894716547, 'samples': 18269184, 'steps': 35681, 'loss/train': 3.1597840785980225} +03/05/2022 07:24:37 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/05/2022 07:24:41 - INFO - codeparrot_training - Step 35682: {'lr': 0.000438778710452191, 'samples': 18269696, 'steps': 35682, 'loss/train': 1.5944234132766724} +03/05/2022 07:24:44 - INFO - codeparrot_training - Step 35683: {'lr': 0.00043877523134766664, 'samples': 18270208, 'steps': 35683, 'loss/train': 0.15570977330207825} +03/05/2022 07:24:45 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 07:24:50 - INFO - codeparrot_training - Step 35684: {'lr': 0.0004387717521580829, 'samples': 18270720, 'steps': 35684, 'loss/train': 2.090160608291626} +03/05/2022 07:24:53 - INFO - codeparrot_training - Step 35685: {'lr': 0.00043876827288344156, 'samples': 18271232, 'steps': 35685, 'loss/train': 0.8165982365608215} +03/05/2022 07:24:53 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 07:24:58 - INFO - codeparrot_training - Step 35686: {'lr': 0.00043876479352374423, 'samples': 18271744, 'steps': 35686, 'loss/train': 1.2597475051879883} +03/05/2022 07:25:01 - INFO - codeparrot_training - Step 35687: {'lr': 0.00043876131407899233, 'samples': 18272256, 'steps': 35687, 'loss/train': 1.1014387607574463} +03/05/2022 07:25:02 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 07:25:07 - INFO - codeparrot_training - Step 35688: {'lr': 0.00043875783454918753, 'samples': 18272768, 'steps': 35688, 'loss/train': 1.2481452226638794} +03/05/2022 07:25:10 - INFO - codeparrot_training - Step 35689: {'lr': 0.00043875435493433135, 'samples': 18273280, 'steps': 35689, 'loss/train': 1.2008436918258667} +03/05/2022 07:25:10 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 07:25:15 - INFO - codeparrot_training - Step 35690: {'lr': 0.00043875087523442537, 'samples': 18273792, 'steps': 35690, 'loss/train': 2.7257983684539795} +03/05/2022 07:25:18 - INFO - codeparrot_training - Step 35691: {'lr': 0.0004387473954494712, 'samples': 18274304, 'steps': 35691, 'loss/train': 0.4964764714241028} +03/05/2022 07:25:18 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 07:25:23 - INFO - codeparrot_training - Step 35692: {'lr': 0.00043874391557947027, 'samples': 18274816, 'steps': 35692, 'loss/train': 2.1800122261047363} +03/05/2022 07:25:27 - INFO - codeparrot_training - Step 35693: {'lr': 0.0004387404356244243, 'samples': 18275328, 'steps': 35693, 'loss/train': 1.1795260906219482} +03/05/2022 07:25:27 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 07:25:32 - INFO - codeparrot_training - Step 35694: {'lr': 0.0004387369555843348, 'samples': 18275840, 'steps': 35694, 'loss/train': 2.9811110496520996} +03/05/2022 07:25:35 - INFO - codeparrot_training - Step 35695: {'lr': 0.00043873347545920333, 'samples': 18276352, 'steps': 35695, 'loss/train': 1.864378571510315} +03/05/2022 07:25:35 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 07:25:40 - INFO - codeparrot_training - Step 35696: {'lr': 0.00043872999524903147, 'samples': 18276864, 'steps': 35696, 'loss/train': 1.05726158618927} +03/05/2022 07:25:44 - INFO - codeparrot_training - Step 35697: {'lr': 0.00043872651495382076, 'samples': 18277376, 'steps': 35697, 'loss/train': 1.5211248397827148} +03/05/2022 07:25:44 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/05/2022 07:25:49 - INFO - codeparrot_training - Step 35698: {'lr': 0.00043872303457357287, 'samples': 18277888, 'steps': 35698, 'loss/train': 0.8609614372253418} +03/05/2022 07:25:52 - INFO - codeparrot_training - Step 35699: {'lr': 0.0004387195541082892, 'samples': 18278400, 'steps': 35699, 'loss/train': 2.2257816791534424} +03/05/2022 07:25:52 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/05/2022 07:25:57 - INFO - codeparrot_training - Step 35700: {'lr': 0.0004387160735579715, 'samples': 18278912, 'steps': 35700, 'loss/train': 0.5324477553367615} +03/05/2022 07:26:00 - INFO - codeparrot_training - Step 35701: {'lr': 0.0004387125929226212, 'samples': 18279424, 'steps': 35701, 'loss/train': 1.7016823291778564} +03/05/2022 07:26:01 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 07:26:06 - INFO - codeparrot_training - Step 35702: {'lr': 0.00043870911220224, 'samples': 18279936, 'steps': 35702, 'loss/train': 2.3088417053222656} +03/05/2022 07:26:09 - INFO - codeparrot_training - Step 35703: {'lr': 0.0004387056313968293, 'samples': 18280448, 'steps': 35703, 'loss/train': 1.4109809398651123} +03/05/2022 07:26:09 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/05/2022 07:26:14 - INFO - codeparrot_training - Step 35704: {'lr': 0.00043870215050639073, 'samples': 18280960, 'steps': 35704, 'loss/train': 0.6389027237892151} +03/05/2022 07:26:17 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/05/2022 07:26:19 - INFO - codeparrot_training - Step 35705: {'lr': 0.00043869866953092593, 'samples': 18281472, 'steps': 35705, 'loss/train': 1.3436239957809448} +03/05/2022 07:26:22 - INFO - codeparrot_training - Step 35706: {'lr': 0.00043869518847043643, 'samples': 18281984, 'steps': 35706, 'loss/train': 0.90720534324646} +03/05/2022 07:26:25 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 07:26:28 - INFO - codeparrot_training - Step 35707: {'lr': 0.0004386917073249237, 'samples': 18282496, 'steps': 35707, 'loss/train': 1.7344626188278198} +03/05/2022 07:26:31 - INFO - codeparrot_training - Step 35708: {'lr': 0.00043868822609438953, 'samples': 18283008, 'steps': 35708, 'loss/train': 1.26978600025177} +03/05/2022 07:26:33 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/05/2022 07:26:36 - INFO - codeparrot_training - Step 35709: {'lr': 0.00043868474477883523, 'samples': 18283520, 'steps': 35709, 'loss/train': 1.5480319261550903} +03/05/2022 07:26:39 - INFO - codeparrot_training - Step 35710: {'lr': 0.0004386812633782626, 'samples': 18284032, 'steps': 35710, 'loss/train': 2.3695502281188965} +03/05/2022 07:26:41 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 07:26:45 - INFO - codeparrot_training - Step 35711: {'lr': 0.00043867778189267306, 'samples': 18284544, 'steps': 35711, 'loss/train': 0.054140251129865646} +03/05/2022 07:26:48 - INFO - codeparrot_training - Step 35712: {'lr': 0.0004386743003220682, 'samples': 18285056, 'steps': 35712, 'loss/train': 2.1086080074310303} +03/05/2022 07:26:50 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 07:26:53 - INFO - codeparrot_training - Step 35713: {'lr': 0.0004386708186664496, 'samples': 18285568, 'steps': 35713, 'loss/train': 1.4662047624588013} +03/05/2022 07:26:56 - INFO - codeparrot_training - Step 35714: {'lr': 0.00043866733692581896, 'samples': 18286080, 'steps': 35714, 'loss/train': 2.4179627895355225} +03/05/2022 07:26:59 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 07:27:01 - INFO - codeparrot_training - Step 35715: {'lr': 0.0004386638551001777, 'samples': 18286592, 'steps': 35715, 'loss/train': 1.7502223253250122} +03/05/2022 07:27:05 - INFO - codeparrot_training - Step 35716: {'lr': 0.00043866037318952735, 'samples': 18287104, 'steps': 35716, 'loss/train': 1.7414695024490356} +03/05/2022 07:27:07 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/05/2022 07:27:10 - INFO - codeparrot_training - Step 35717: {'lr': 0.0004386568911938695, 'samples': 18287616, 'steps': 35717, 'loss/train': 1.2179512977600098} +03/05/2022 07:27:13 - INFO - codeparrot_training - Step 35718: {'lr': 0.0004386534091132059, 'samples': 18288128, 'steps': 35718, 'loss/train': 2.3164379596710205} +03/05/2022 07:27:15 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 07:27:18 - INFO - codeparrot_training - Step 35719: {'lr': 0.0004386499269475379, 'samples': 18288640, 'steps': 35719, 'loss/train': 1.5551913976669312} +03/05/2022 07:27:22 - INFO - codeparrot_training - Step 35720: {'lr': 0.00043864644469686717, 'samples': 18289152, 'steps': 35720, 'loss/train': 1.8597856760025024} +03/05/2022 07:27:24 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 07:27:27 - INFO - codeparrot_training - Step 35721: {'lr': 0.0004386429623611953, 'samples': 18289664, 'steps': 35721, 'loss/train': 1.6464550495147705} +03/05/2022 07:27:30 - INFO - codeparrot_training - Step 35722: {'lr': 0.0004386394799405238, 'samples': 18290176, 'steps': 35722, 'loss/train': 0.6548097133636475} +03/05/2022 07:27:32 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/05/2022 07:27:35 - INFO - codeparrot_training - Step 35723: {'lr': 0.00043863599743485416, 'samples': 18290688, 'steps': 35723, 'loss/train': 2.060009479522705} +03/05/2022 07:27:38 - INFO - codeparrot_training - Step 35724: {'lr': 0.0004386325148441882, 'samples': 18291200, 'steps': 35724, 'loss/train': 1.272027850151062} +03/05/2022 07:27:40 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/05/2022 07:27:44 - INFO - codeparrot_training - Step 35725: {'lr': 0.00043862903216852723, 'samples': 18291712, 'steps': 35725, 'loss/train': 0.8226000666618347} +03/05/2022 07:27:47 - INFO - codeparrot_training - Step 35726: {'lr': 0.00043862554940787303, 'samples': 18292224, 'steps': 35726, 'loss/train': 1.268876314163208} +03/05/2022 07:27:49 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 07:27:52 - INFO - codeparrot_training - Step 35727: {'lr': 0.000438622066562227, 'samples': 18292736, 'steps': 35727, 'loss/train': 0.7049694061279297} +03/05/2022 07:27:55 - INFO - codeparrot_training - Step 35728: {'lr': 0.0004386185836315908, 'samples': 18293248, 'steps': 35728, 'loss/train': 1.185179352760315} +03/05/2022 07:27:57 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 07:28:00 - INFO - codeparrot_training - Step 35729: {'lr': 0.0004386151006159659, 'samples': 18293760, 'steps': 35729, 'loss/train': 3.206468105316162} +03/05/2022 07:28:04 - INFO - codeparrot_training - Step 35730: {'lr': 0.00043861161751535406, 'samples': 18294272, 'steps': 35730, 'loss/train': 0.8603824973106384} +03/05/2022 07:28:05 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/05/2022 07:28:09 - INFO - codeparrot_training - Step 35731: {'lr': 0.0004386081343297567, 'samples': 18294784, 'steps': 35731, 'loss/train': 2.553220272064209} +03/05/2022 07:28:12 - INFO - codeparrot_training - Step 35732: {'lr': 0.0004386046510591754, 'samples': 18295296, 'steps': 35732, 'loss/train': 2.065293312072754} +03/05/2022 07:28:14 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 07:28:17 - INFO - codeparrot_training - Step 35733: {'lr': 0.0004386011677036118, 'samples': 18295808, 'steps': 35733, 'loss/train': 2.0429577827453613} +03/05/2022 07:28:20 - INFO - codeparrot_training - Step 35734: {'lr': 0.00043859768426306737, 'samples': 18296320, 'steps': 35734, 'loss/train': 1.7138265371322632} +03/05/2022 07:28:22 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/05/2022 07:28:26 - INFO - codeparrot_training - Step 35735: {'lr': 0.00043859420073754377, 'samples': 18296832, 'steps': 35735, 'loss/train': 1.628240704536438} +03/05/2022 07:28:29 - INFO - codeparrot_training - Step 35736: {'lr': 0.0004385907171270425, 'samples': 18297344, 'steps': 35736, 'loss/train': 1.752416968345642} +03/05/2022 07:28:30 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 07:28:34 - INFO - codeparrot_training - Step 35737: {'lr': 0.00043858723343156514, 'samples': 18297856, 'steps': 35737, 'loss/train': 1.7866249084472656} +03/05/2022 07:28:37 - INFO - codeparrot_training - Step 35738: {'lr': 0.00043858374965111336, 'samples': 18298368, 'steps': 35738, 'loss/train': 0.9406494498252869} +03/05/2022 07:28:39 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 07:28:43 - INFO - codeparrot_training - Step 35739: {'lr': 0.00043858026578568864, 'samples': 18298880, 'steps': 35739, 'loss/train': 2.0512239933013916} +03/05/2022 07:28:46 - INFO - codeparrot_training - Step 35740: {'lr': 0.00043857678183529256, 'samples': 18299392, 'steps': 35740, 'loss/train': 1.4369585514068604} +03/05/2022 07:28:47 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 07:28:51 - INFO - codeparrot_training - Step 35741: {'lr': 0.0004385732977999266, 'samples': 18299904, 'steps': 35741, 'loss/train': 1.4000229835510254} +03/05/2022 07:28:54 - INFO - codeparrot_training - Step 35742: {'lr': 0.0004385698136795926, 'samples': 18300416, 'steps': 35742, 'loss/train': 2.050995349884033} +03/05/2022 07:28:55 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 07:28:59 - INFO - codeparrot_training - Step 35743: {'lr': 0.00043856632947429175, 'samples': 18300928, 'steps': 35743, 'loss/train': 0.8691539764404297} +03/05/2022 07:29:03 - INFO - codeparrot_training - Step 35744: {'lr': 0.00043856284518402594, 'samples': 18301440, 'steps': 35744, 'loss/train': 1.6727403402328491} +03/05/2022 07:29:04 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 07:29:08 - INFO - codeparrot_training - Step 35745: {'lr': 0.00043855936080879667, 'samples': 18301952, 'steps': 35745, 'loss/train': 2.5223805904388428} +03/05/2022 07:29:11 - INFO - codeparrot_training - Step 35746: {'lr': 0.0004385558763486053, 'samples': 18302464, 'steps': 35746, 'loss/train': 2.0352156162261963} +03/05/2022 07:29:12 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/05/2022 07:29:16 - INFO - codeparrot_training - Step 35747: {'lr': 0.00043855239180345376, 'samples': 18302976, 'steps': 35747, 'loss/train': 1.8072291612625122} +03/05/2022 07:29:19 - INFO - codeparrot_training - Step 35748: {'lr': 0.00043854890717334326, 'samples': 18303488, 'steps': 35748, 'loss/train': 1.5871286392211914} +03/05/2022 07:29:21 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/05/2022 07:29:25 - INFO - codeparrot_training - Step 35749: {'lr': 0.00043854542245827554, 'samples': 18304000, 'steps': 35749, 'loss/train': 1.2172420024871826} +03/05/2022 07:29:28 - INFO - codeparrot_training - Step 35750: {'lr': 0.00043854193765825223, 'samples': 18304512, 'steps': 35750, 'loss/train': 1.8148852586746216} +03/05/2022 07:29:29 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 07:29:33 - INFO - codeparrot_training - Step 35751: {'lr': 0.00043853845277327485, 'samples': 18305024, 'steps': 35751, 'loss/train': 2.3539435863494873} +03/05/2022 07:29:36 - INFO - codeparrot_training - Step 35752: {'lr': 0.0004385349678033449, 'samples': 18305536, 'steps': 35752, 'loss/train': 1.6744474172592163} +03/05/2022 07:29:37 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 07:29:42 - INFO - codeparrot_training - Step 35753: {'lr': 0.000438531482748464, 'samples': 18306048, 'steps': 35753, 'loss/train': 1.3685208559036255} +03/05/2022 07:29:45 - INFO - codeparrot_training - Step 35754: {'lr': 0.00043852799760863375, 'samples': 18306560, 'steps': 35754, 'loss/train': 1.461417555809021} +03/05/2022 07:29:46 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/05/2022 07:29:50 - INFO - codeparrot_training - Step 35755: {'lr': 0.0004385245123838557, 'samples': 18307072, 'steps': 35755, 'loss/train': 1.2356294393539429} +03/05/2022 07:29:53 - INFO - codeparrot_training - Step 35756: {'lr': 0.00043852102707413144, 'samples': 18307584, 'steps': 35756, 'loss/train': 1.5272518396377563} +03/05/2022 07:29:54 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/05/2022 07:29:58 - INFO - codeparrot_training - Step 35757: {'lr': 0.00043851754167946244, 'samples': 18308096, 'steps': 35757, 'loss/train': 1.9455064535140991} +03/05/2022 07:30:02 - INFO - codeparrot_training - Step 35758: {'lr': 0.00043851405619985037, 'samples': 18308608, 'steps': 35758, 'loss/train': 0.501350998878479} +03/05/2022 07:30:03 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/05/2022 07:30:07 - INFO - codeparrot_training - Step 35759: {'lr': 0.00043851057063529675, 'samples': 18309120, 'steps': 35759, 'loss/train': 1.8252297639846802} +03/05/2022 07:30:10 - INFO - codeparrot_training - Step 35760: {'lr': 0.00043850708498580326, 'samples': 18309632, 'steps': 35760, 'loss/train': 1.6695834398269653} +03/05/2022 07:30:11 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/05/2022 07:30:16 - INFO - codeparrot_training - Step 35761: {'lr': 0.00043850359925137126, 'samples': 18310144, 'steps': 35761, 'loss/train': 1.1442546844482422} +03/05/2022 07:30:19 - INFO - codeparrot_training - Step 35762: {'lr': 0.0004385001134320026, 'samples': 18310656, 'steps': 35762, 'loss/train': 3.042152166366577} +03/05/2022 07:30:22 - INFO - codeparrot_training - Step 35763: {'lr': 0.0004384966275276986, 'samples': 18311168, 'steps': 35763, 'loss/train': 1.663593053817749} +03/05/2022 07:30:22 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 07:30:27 - INFO - codeparrot_training - Step 35764: {'lr': 0.00043849314153846094, 'samples': 18311680, 'steps': 35764, 'loss/train': 1.6578134298324585} +03/05/2022 07:30:30 - INFO - codeparrot_training - Step 35765: {'lr': 0.0004384896554642912, 'samples': 18312192, 'steps': 35765, 'loss/train': 1.4540376663208008} +03/05/2022 07:30:30 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/05/2022 07:30:36 - INFO - codeparrot_training - Step 35766: {'lr': 0.00043848616930519094, 'samples': 18312704, 'steps': 35766, 'loss/train': 2.3329668045043945} +03/05/2022 07:30:38 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 07:30:41 - INFO - codeparrot_training - Step 35767: {'lr': 0.0004384826830611617, 'samples': 18313216, 'steps': 35767, 'loss/train': 1.8129961490631104} +03/05/2022 07:30:44 - INFO - codeparrot_training - Step 35768: {'lr': 0.00043847919673220504, 'samples': 18313728, 'steps': 35768, 'loss/train': 1.4120835065841675} +03/05/2022 07:30:47 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 07:30:49 - INFO - codeparrot_training - Step 35769: {'lr': 0.00043847571031832257, 'samples': 18314240, 'steps': 35769, 'loss/train': 1.8716576099395752} +03/05/2022 07:30:53 - INFO - codeparrot_training - Step 35770: {'lr': 0.0004384722238195159, 'samples': 18314752, 'steps': 35770, 'loss/train': 1.860042691230774} +03/05/2022 07:30:55 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 07:30:58 - INFO - codeparrot_training - Step 35771: {'lr': 0.0004384687372357865, 'samples': 18315264, 'steps': 35771, 'loss/train': 1.718870759010315} +03/05/2022 07:31:01 - INFO - codeparrot_training - Step 35772: {'lr': 0.000438465250567136, 'samples': 18315776, 'steps': 35772, 'loss/train': 1.545990228652954} +03/05/2022 07:31:03 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 07:31:06 - INFO - codeparrot_training - Step 35773: {'lr': 0.00043846176381356607, 'samples': 18316288, 'steps': 35773, 'loss/train': 1.1574318408966064} +03/05/2022 07:31:09 - INFO - codeparrot_training - Step 35774: {'lr': 0.000438458276975078, 'samples': 18316800, 'steps': 35774, 'loss/train': 1.547105312347412} +03/05/2022 07:31:12 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 07:31:15 - INFO - codeparrot_training - Step 35775: {'lr': 0.0004384547900516737, 'samples': 18317312, 'steps': 35775, 'loss/train': 1.7088549137115479} +03/05/2022 07:31:18 - INFO - codeparrot_training - Step 35776: {'lr': 0.00043845130304335454, 'samples': 18317824, 'steps': 35776, 'loss/train': 2.0230493545532227} +03/05/2022 07:31:20 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/05/2022 07:31:23 - INFO - codeparrot_training - Step 35777: {'lr': 0.00043844781595012204, 'samples': 18318336, 'steps': 35777, 'loss/train': 0.9689851403236389} +03/05/2022 07:31:26 - INFO - codeparrot_training - Step 35778: {'lr': 0.0004384443287719779, 'samples': 18318848, 'steps': 35778, 'loss/train': 2.340423107147217} +03/05/2022 07:31:28 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/05/2022 07:31:31 - INFO - codeparrot_training - Step 35779: {'lr': 0.0004384408415089237, 'samples': 18319360, 'steps': 35779, 'loss/train': 2.063995361328125} +03/05/2022 07:31:35 - INFO - codeparrot_training - Step 35780: {'lr': 0.000438437354160961, 'samples': 18319872, 'steps': 35780, 'loss/train': 0.8279281854629517} +03/05/2022 07:31:37 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 07:31:40 - INFO - codeparrot_training - Step 35781: {'lr': 0.00043843386672809127, 'samples': 18320384, 'steps': 35781, 'loss/train': 1.667568564414978} +03/05/2022 07:31:43 - INFO - codeparrot_training - Step 35782: {'lr': 0.00043843037921031616, 'samples': 18320896, 'steps': 35782, 'loss/train': 1.1954513788223267} +03/05/2022 07:31:45 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/05/2022 07:31:49 - INFO - codeparrot_training - Step 35783: {'lr': 0.00043842689160763723, 'samples': 18321408, 'steps': 35783, 'loss/train': 2.2057087421417236} +03/05/2022 07:31:52 - INFO - codeparrot_training - Step 35784: {'lr': 0.00043842340392005605, 'samples': 18321920, 'steps': 35784, 'loss/train': 0.9788277745246887} +03/05/2022 07:31:54 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 07:31:57 - INFO - codeparrot_training - Step 35785: {'lr': 0.00043841991614757415, 'samples': 18322432, 'steps': 35785, 'loss/train': 1.2413392066955566} +03/05/2022 07:32:00 - INFO - codeparrot_training - Step 35786: {'lr': 0.00043841642829019325, 'samples': 18322944, 'steps': 35786, 'loss/train': 1.8858823776245117} +03/05/2022 07:32:02 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 07:32:05 - INFO - codeparrot_training - Step 35787: {'lr': 0.00043841294034791466, 'samples': 18323456, 'steps': 35787, 'loss/train': 1.3625431060791016} +03/05/2022 07:32:08 - INFO - codeparrot_training - Step 35788: {'lr': 0.0004384094523207403, 'samples': 18323968, 'steps': 35788, 'loss/train': 2.3667798042297363} +03/05/2022 07:32:10 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/05/2022 07:32:14 - INFO - codeparrot_training - Step 35789: {'lr': 0.0004384059642086714, 'samples': 18324480, 'steps': 35789, 'loss/train': 2.4646449089050293} +03/05/2022 07:32:17 - INFO - codeparrot_training - Step 35790: {'lr': 0.00043840247601170966, 'samples': 18324992, 'steps': 35790, 'loss/train': 1.4158660173416138} +03/05/2022 07:32:19 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/05/2022 07:32:22 - INFO - codeparrot_training - Step 35791: {'lr': 0.0004383989877298568, 'samples': 18325504, 'steps': 35791, 'loss/train': 1.6519218683242798} +03/05/2022 07:32:25 - INFO - codeparrot_training - Step 35792: {'lr': 0.0004383954993631142, 'samples': 18326016, 'steps': 35792, 'loss/train': 2.056934118270874} +03/05/2022 07:32:27 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/05/2022 07:32:31 - INFO - codeparrot_training - Step 35793: {'lr': 0.0004383920109114835, 'samples': 18326528, 'steps': 35793, 'loss/train': 1.6480445861816406} +03/05/2022 07:32:34 - INFO - codeparrot_training - Step 35794: {'lr': 0.00043838852237496626, 'samples': 18327040, 'steps': 35794, 'loss/train': 1.8572678565979004} +03/05/2022 07:32:35 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/05/2022 07:32:39 - INFO - codeparrot_training - Step 35795: {'lr': 0.000438385033753564, 'samples': 18327552, 'steps': 35795, 'loss/train': 1.688754677772522} +03/05/2022 07:32:42 - INFO - codeparrot_training - Step 35796: {'lr': 0.00043838154504727847, 'samples': 18328064, 'steps': 35796, 'loss/train': 1.3485699892044067} +03/05/2022 07:32:44 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/05/2022 07:32:47 - INFO - codeparrot_training - Step 35797: {'lr': 0.00043837805625611105, 'samples': 18328576, 'steps': 35797, 'loss/train': 1.211952567100525} +03/05/2022 07:32:51 - INFO - codeparrot_training - Step 35798: {'lr': 0.0004383745673800634, 'samples': 18329088, 'steps': 35798, 'loss/train': 2.014723300933838} +03/05/2022 07:32:52 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/05/2022 07:32:56 - INFO - codeparrot_training - Step 35799: {'lr': 0.000438371078419137, 'samples': 18329600, 'steps': 35799, 'loss/train': 1.5385650396347046} +03/05/2022 07:32:59 - INFO - codeparrot_training - Step 35800: {'lr': 0.00043836758937333366, 'samples': 18330112, 'steps': 35800, 'loss/train': 1.8218685388565063} +03/05/2022 07:33:01 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 07:33:04 - INFO - codeparrot_training - Step 35801: {'lr': 0.0004383641002426547, 'samples': 18330624, 'steps': 35801, 'loss/train': 2.1332199573516846} +03/05/2022 07:33:07 - INFO - codeparrot_training - Step 35802: {'lr': 0.0004383606110271018, 'samples': 18331136, 'steps': 35802, 'loss/train': 1.9530038833618164} +03/05/2022 07:33:09 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 07:33:13 - INFO - codeparrot_training - Step 35803: {'lr': 0.00043835712172667643, 'samples': 18331648, 'steps': 35803, 'loss/train': 1.552128791809082} +03/05/2022 07:33:16 - INFO - codeparrot_training - Step 35804: {'lr': 0.00043835363234138037, 'samples': 18332160, 'steps': 35804, 'loss/train': 1.414260983467102} +03/05/2022 07:33:17 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 07:33:22 - INFO - codeparrot_training - Step 35805: {'lr': 0.00043835014287121497, 'samples': 18332672, 'steps': 35805, 'loss/train': 1.9430971145629883} +03/05/2022 07:33:25 - INFO - codeparrot_training - Step 35806: {'lr': 0.00043834665331618196, 'samples': 18333184, 'steps': 35806, 'loss/train': 2.01493763923645} +03/05/2022 07:33:28 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/05/2022 07:33:30 - INFO - codeparrot_training - Step 35807: {'lr': 0.00043834316367628287, 'samples': 18333696, 'steps': 35807, 'loss/train': 2.5154149532318115} +03/05/2022 07:33:33 - INFO - codeparrot_training - Step 35808: {'lr': 0.0004383396739515192, 'samples': 18334208, 'steps': 35808, 'loss/train': 1.9666022062301636} +03/05/2022 07:33:36 - INFO - codeparrot_training - Step 35809: {'lr': 0.00043833618414189265, 'samples': 18334720, 'steps': 35809, 'loss/train': 2.1317696571350098} +03/05/2022 07:33:36 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 07:33:42 - INFO - codeparrot_training - Step 35810: {'lr': 0.0004383326942474046, 'samples': 18335232, 'steps': 35810, 'loss/train': 1.656234622001648} +03/05/2022 07:33:45 - INFO - codeparrot_training - Step 35811: {'lr': 0.0004383292042680569, 'samples': 18335744, 'steps': 35811, 'loss/train': 1.6465381383895874} +03/05/2022 07:33:45 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/05/2022 07:33:50 - INFO - codeparrot_training - Step 35812: {'lr': 0.0004383257142038509, 'samples': 18336256, 'steps': 35812, 'loss/train': 0.8864298462867737} +03/05/2022 07:33:53 - INFO - codeparrot_training - Step 35813: {'lr': 0.0004383222240547882, 'samples': 18336768, 'steps': 35813, 'loss/train': 1.8521597385406494} +03/05/2022 07:33:54 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 07:33:59 - INFO - codeparrot_training - Step 35814: {'lr': 0.00043831873382087043, 'samples': 18337280, 'steps': 35814, 'loss/train': 1.9385435581207275} +03/05/2022 07:34:02 - INFO - codeparrot_training - Step 35815: {'lr': 0.0004383152435020992, 'samples': 18337792, 'steps': 35815, 'loss/train': 0.9484081268310547} +03/05/2022 07:34:02 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/05/2022 07:34:08 - INFO - codeparrot_training - Step 35816: {'lr': 0.0004383117530984759, 'samples': 18338304, 'steps': 35816, 'loss/train': 2.141054630279541} +03/05/2022 07:34:11 - INFO - codeparrot_training - Step 35817: {'lr': 0.0004383082626100024, 'samples': 18338816, 'steps': 35817, 'loss/train': 1.7957396507263184} +03/05/2022 07:34:13 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 07:34:16 - INFO - codeparrot_training - Step 35818: {'lr': 0.00043830477203668, 'samples': 18339328, 'steps': 35818, 'loss/train': 1.8486052751541138} +03/05/2022 07:34:19 - INFO - codeparrot_training - Step 35819: {'lr': 0.0004383012813785104, 'samples': 18339840, 'steps': 35819, 'loss/train': 1.1866755485534668} +03/05/2022 07:34:21 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 07:34:24 - INFO - codeparrot_training - Step 35820: {'lr': 0.00043829779063549515, 'samples': 18340352, 'steps': 35820, 'loss/train': 1.4330103397369385} +03/05/2022 07:34:28 - INFO - codeparrot_training - Step 35821: {'lr': 0.0004382942998076358, 'samples': 18340864, 'steps': 35821, 'loss/train': 1.8692923784255981} +03/05/2022 07:34:30 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/05/2022 07:34:33 - INFO - codeparrot_training - Step 35822: {'lr': 0.000438290808894934, 'samples': 18341376, 'steps': 35822, 'loss/train': 1.7344541549682617} +03/05/2022 07:34:36 - INFO - codeparrot_training - Step 35823: {'lr': 0.0004382873178973912, 'samples': 18341888, 'steps': 35823, 'loss/train': 1.2696970701217651} +03/05/2022 07:34:38 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 07:34:41 - INFO - codeparrot_training - Step 35824: {'lr': 0.00043828382681500907, 'samples': 18342400, 'steps': 35824, 'loss/train': 1.3480753898620605} +03/05/2022 07:34:45 - INFO - codeparrot_training - Step 35825: {'lr': 0.0004382803356477891, 'samples': 18342912, 'steps': 35825, 'loss/train': 1.7241772413253784} +03/05/2022 07:34:46 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/05/2022 07:34:50 - INFO - codeparrot_training - Step 35826: {'lr': 0.000438276844395733, 'samples': 18343424, 'steps': 35826, 'loss/train': 1.0532060861587524} +03/05/2022 07:34:53 - INFO - codeparrot_training - Step 35827: {'lr': 0.0004382733530588422, 'samples': 18343936, 'steps': 35827, 'loss/train': 1.5139710903167725} +03/05/2022 07:34:55 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/05/2022 07:34:58 - INFO - codeparrot_training - Step 35828: {'lr': 0.00043826986163711835, 'samples': 18344448, 'steps': 35828, 'loss/train': 0.5515784621238708} +03/05/2022 07:35:02 - INFO - codeparrot_training - Step 35829: {'lr': 0.000438266370130563, 'samples': 18344960, 'steps': 35829, 'loss/train': 2.2003138065338135} +03/05/2022 07:35:03 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/05/2022 07:35:07 - INFO - codeparrot_training - Step 35830: {'lr': 0.0004382628785391778, 'samples': 18345472, 'steps': 35830, 'loss/train': 1.5054694414138794} +03/05/2022 07:35:10 - INFO - codeparrot_training - Step 35831: {'lr': 0.00043825938686296417, 'samples': 18345984, 'steps': 35831, 'loss/train': 0.11516143381595612} +03/05/2022 07:35:12 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 07:35:15 - INFO - codeparrot_training - Step 35832: {'lr': 0.00043825589510192376, 'samples': 18346496, 'steps': 35832, 'loss/train': 6.5704755783081055} +03/05/2022 07:35:18 - INFO - codeparrot_training - Step 35833: {'lr': 0.0004382524032560582, 'samples': 18347008, 'steps': 35833, 'loss/train': 1.3933088779449463} +03/05/2022 07:35:20 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 07:35:24 - INFO - codeparrot_training - Step 35834: {'lr': 0.000438248911325369, 'samples': 18347520, 'steps': 35834, 'loss/train': 1.6931428909301758} +03/05/2022 07:35:27 - INFO - codeparrot_training - Step 35835: {'lr': 0.00043824541930985775, 'samples': 18348032, 'steps': 35835, 'loss/train': 1.0527546405792236} +03/05/2022 07:35:28 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 07:35:32 - INFO - codeparrot_training - Step 35836: {'lr': 0.0004382419272095259, 'samples': 18348544, 'steps': 35836, 'loss/train': 1.5860486030578613} +03/05/2022 07:35:35 - INFO - codeparrot_training - Step 35837: {'lr': 0.00043823843502437533, 'samples': 18349056, 'steps': 35837, 'loss/train': 2.004103183746338} +03/05/2022 07:35:37 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/05/2022 07:35:40 - INFO - codeparrot_training - Step 35838: {'lr': 0.00043823494275440733, 'samples': 18349568, 'steps': 35838, 'loss/train': 2.264207124710083} +03/05/2022 07:35:44 - INFO - codeparrot_training - Step 35839: {'lr': 0.0004382314503996236, 'samples': 18350080, 'steps': 35839, 'loss/train': 1.4864553213119507} +03/05/2022 07:35:45 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 07:35:49 - INFO - codeparrot_training - Step 35840: {'lr': 0.0004382279579600256, 'samples': 18350592, 'steps': 35840, 'loss/train': 1.1642504930496216} +03/05/2022 07:35:52 - INFO - codeparrot_training - Step 35841: {'lr': 0.0004382244654356151, 'samples': 18351104, 'steps': 35841, 'loss/train': 0.34490540623664856} +03/05/2022 07:35:53 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 07:35:57 - INFO - codeparrot_training - Step 35842: {'lr': 0.0004382209728263935, 'samples': 18351616, 'steps': 35842, 'loss/train': 3.0427041053771973} +03/05/2022 07:36:01 - INFO - codeparrot_training - Step 35843: {'lr': 0.0004382174801323624, 'samples': 18352128, 'steps': 35843, 'loss/train': 1.7474128007888794} +03/05/2022 07:36:02 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 07:36:06 - INFO - codeparrot_training - Step 35844: {'lr': 0.00043821398735352344, 'samples': 18352640, 'steps': 35844, 'loss/train': 1.0553959608078003} +03/05/2022 07:36:09 - INFO - codeparrot_training - Step 35845: {'lr': 0.0004382104944898782, 'samples': 18353152, 'steps': 35845, 'loss/train': 1.8082078695297241} +03/05/2022 07:36:11 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/05/2022 07:36:14 - INFO - codeparrot_training - Step 35846: {'lr': 0.00043820700154142825, 'samples': 18353664, 'steps': 35846, 'loss/train': 1.3046081066131592} +03/05/2022 07:36:17 - INFO - codeparrot_training - Step 35847: {'lr': 0.00043820350850817504, 'samples': 18354176, 'steps': 35847, 'loss/train': 0.22505730390548706} +03/05/2022 07:36:19 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/05/2022 07:36:23 - INFO - codeparrot_training - Step 35848: {'lr': 0.00043820001539012025, 'samples': 18354688, 'steps': 35848, 'loss/train': 0.8354507684707642} +03/05/2022 07:36:26 - INFO - codeparrot_training - Step 35849: {'lr': 0.00043819652218726545, 'samples': 18355200, 'steps': 35849, 'loss/train': 1.2859764099121094} +03/05/2022 07:36:27 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 07:36:31 - INFO - codeparrot_training - Step 35850: {'lr': 0.0004381930288996122, 'samples': 18355712, 'steps': 35850, 'loss/train': 1.8417073488235474} +03/05/2022 07:36:34 - INFO - codeparrot_training - Step 35851: {'lr': 0.0004381895355271621, 'samples': 18356224, 'steps': 35851, 'loss/train': 2.1338348388671875} +03/05/2022 07:36:36 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/05/2022 07:36:39 - INFO - codeparrot_training - Step 35852: {'lr': 0.00043818604206991664, 'samples': 18356736, 'steps': 35852, 'loss/train': 1.8656824827194214} +03/05/2022 07:36:43 - INFO - codeparrot_training - Step 35853: {'lr': 0.0004381825485278775, 'samples': 18357248, 'steps': 35853, 'loss/train': 0.5284648537635803} +03/05/2022 07:36:44 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 07:36:48 - INFO - codeparrot_training - Step 35854: {'lr': 0.00043817905490104613, 'samples': 18357760, 'steps': 35854, 'loss/train': 1.9920260906219482} +03/05/2022 07:36:51 - INFO - codeparrot_training - Step 35855: {'lr': 0.00043817556118942426, 'samples': 18358272, 'steps': 35855, 'loss/train': 2.0864834785461426} +03/05/2022 07:36:53 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/05/2022 07:36:56 - INFO - codeparrot_training - Step 35856: {'lr': 0.0004381720673930134, 'samples': 18358784, 'steps': 35856, 'loss/train': 1.747823715209961} +03/05/2022 07:37:00 - INFO - codeparrot_training - Step 35857: {'lr': 0.00043816857351181503, 'samples': 18359296, 'steps': 35857, 'loss/train': 0.8852327466011047} +03/05/2022 07:37:01 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 07:37:05 - INFO - codeparrot_training - Step 35858: {'lr': 0.0004381650795458309, 'samples': 18359808, 'steps': 35858, 'loss/train': 1.6517338752746582} +03/05/2022 07:37:08 - INFO - codeparrot_training - Step 35859: {'lr': 0.0004381615854950625, 'samples': 18360320, 'steps': 35859, 'loss/train': 1.780535101890564} +03/05/2022 07:37:10 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 07:37:13 - INFO - codeparrot_training - Step 35860: {'lr': 0.0004381580913595113, 'samples': 18360832, 'steps': 35860, 'loss/train': 1.717444658279419} +03/05/2022 07:37:16 - INFO - codeparrot_training - Step 35861: {'lr': 0.000438154597139179, 'samples': 18361344, 'steps': 35861, 'loss/train': 1.301609992980957} +03/05/2022 07:37:18 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/05/2022 07:37:22 - INFO - codeparrot_training - Step 35862: {'lr': 0.0004381511028340671, 'samples': 18361856, 'steps': 35862, 'loss/train': 1.6359411478042603} +03/05/2022 07:37:25 - INFO - codeparrot_training - Step 35863: {'lr': 0.0004381476084441773, 'samples': 18362368, 'steps': 35863, 'loss/train': 1.61943519115448} +03/05/2022 07:37:27 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/05/2022 07:37:30 - INFO - codeparrot_training - Step 35864: {'lr': 0.00043814411396951103, 'samples': 18362880, 'steps': 35864, 'loss/train': 2.1041910648345947} +03/05/2022 07:37:34 - INFO - codeparrot_training - Step 35865: {'lr': 0.00043814061941007, 'samples': 18363392, 'steps': 35865, 'loss/train': 2.032467842102051} +03/05/2022 07:37:36 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 07:37:39 - INFO - codeparrot_training - Step 35866: {'lr': 0.00043813712476585564, 'samples': 18363904, 'steps': 35866, 'loss/train': 2.0797317028045654} +03/05/2022 07:37:42 - INFO - codeparrot_training - Step 35867: {'lr': 0.00043813363003686963, 'samples': 18364416, 'steps': 35867, 'loss/train': 2.0738637447357178} +03/05/2022 07:37:44 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/05/2022 07:37:47 - INFO - codeparrot_training - Step 35868: {'lr': 0.00043813013522311353, 'samples': 18364928, 'steps': 35868, 'loss/train': 2.203000783920288} +03/05/2022 07:37:51 - INFO - codeparrot_training - Step 35869: {'lr': 0.0004381266403245888, 'samples': 18365440, 'steps': 35869, 'loss/train': 1.9345464706420898} +03/05/2022 07:37:53 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 07:37:56 - INFO - codeparrot_training - Step 35870: {'lr': 0.00043812314534129716, 'samples': 18365952, 'steps': 35870, 'loss/train': 2.2334775924682617} +03/05/2022 07:37:59 - INFO - codeparrot_training - Step 35871: {'lr': 0.0004381196502732402, 'samples': 18366464, 'steps': 35871, 'loss/train': 1.7445776462554932} +03/05/2022 07:38:01 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 07:38:04 - INFO - codeparrot_training - Step 35872: {'lr': 0.00043811615512041934, 'samples': 18366976, 'steps': 35872, 'loss/train': 1.575972080230713} +03/05/2022 07:38:07 - INFO - codeparrot_training - Step 35873: {'lr': 0.00043811265988283625, 'samples': 18367488, 'steps': 35873, 'loss/train': 1.6200170516967773} +03/05/2022 07:38:09 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/05/2022 07:38:13 - INFO - codeparrot_training - Step 35874: {'lr': 0.00043810916456049257, 'samples': 18368000, 'steps': 35874, 'loss/train': 1.4898769855499268} +03/05/2022 07:38:16 - INFO - codeparrot_training - Step 35875: {'lr': 0.00043810566915338965, 'samples': 18368512, 'steps': 35875, 'loss/train': 2.4805665016174316} +03/05/2022 07:38:19 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 07:38:21 - INFO - codeparrot_training - Step 35876: {'lr': 0.0004381021736615294, 'samples': 18369024, 'steps': 35876, 'loss/train': 1.5496997833251953} +03/05/2022 07:38:24 - INFO - codeparrot_training - Step 35877: {'lr': 0.0004380986780849131, 'samples': 18369536, 'steps': 35877, 'loss/train': 1.1166820526123047} +03/05/2022 07:38:27 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/05/2022 07:38:30 - INFO - codeparrot_training - Step 35878: {'lr': 0.0004380951824235425, 'samples': 18370048, 'steps': 35878, 'loss/train': 2.17287015914917} +03/05/2022 07:38:33 - INFO - codeparrot_training - Step 35879: {'lr': 0.00043809168667741907, 'samples': 18370560, 'steps': 35879, 'loss/train': 1.0047917366027832} +03/05/2022 07:38:35 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/05/2022 07:38:38 - INFO - codeparrot_training - Step 35880: {'lr': 0.0004380881908465445, 'samples': 18371072, 'steps': 35880, 'loss/train': 1.731549859046936} +03/05/2022 07:38:41 - INFO - codeparrot_training - Step 35881: {'lr': 0.0004380846949309202, 'samples': 18371584, 'steps': 35881, 'loss/train': 1.6601622104644775} +03/05/2022 07:38:44 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 07:38:47 - INFO - codeparrot_training - Step 35882: {'lr': 0.00043808119893054787, 'samples': 18372096, 'steps': 35882, 'loss/train': 2.103219985961914} +03/05/2022 07:38:50 - INFO - codeparrot_training - Step 35883: {'lr': 0.0004380777028454291, 'samples': 18372608, 'steps': 35883, 'loss/train': 1.5138561725616455} +03/05/2022 07:38:52 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 07:38:55 - INFO - codeparrot_training - Step 35884: {'lr': 0.0004380742066755654, 'samples': 18373120, 'steps': 35884, 'loss/train': 1.8518624305725098} +03/05/2022 07:38:58 - INFO - codeparrot_training - Step 35885: {'lr': 0.0004380707104209583, 'samples': 18373632, 'steps': 35885, 'loss/train': 2.2512545585632324} +03/05/2022 07:39:01 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/05/2022 07:39:04 - INFO - codeparrot_training - Step 35886: {'lr': 0.0004380672140816095, 'samples': 18374144, 'steps': 35886, 'loss/train': 1.4925892353057861} +03/05/2022 07:39:07 - INFO - codeparrot_training - Step 35887: {'lr': 0.0004380637176575205, 'samples': 18374656, 'steps': 35887, 'loss/train': 3.4840660095214844} +03/05/2022 07:39:10 - INFO - codeparrot_training - Step 35888: {'lr': 0.00043806022114869294, 'samples': 18375168, 'steps': 35888, 'loss/train': 1.579998254776001} +03/05/2022 07:39:12 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 07:39:16 - INFO - codeparrot_training - Step 35889: {'lr': 0.0004380567245551282, 'samples': 18375680, 'steps': 35889, 'loss/train': 2.1771109104156494} +03/05/2022 07:39:19 - INFO - codeparrot_training - Step 35890: {'lr': 0.0004380532278768282, 'samples': 18376192, 'steps': 35890, 'loss/train': 1.5604552030563354} +03/05/2022 07:39:20 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 07:39:24 - INFO - codeparrot_training - Step 35891: {'lr': 0.0004380497311137942, 'samples': 18376704, 'steps': 35891, 'loss/train': 1.2612354755401611} +03/05/2022 07:39:27 - INFO - codeparrot_training - Step 35892: {'lr': 0.00043804623426602784, 'samples': 18377216, 'steps': 35892, 'loss/train': 0.9987308382987976} +03/05/2022 07:39:33 - INFO - codeparrot_training - Step 35893: {'lr': 0.00043804273733353085, 'samples': 18377728, 'steps': 35893, 'loss/train': 1.0788992643356323} +03/05/2022 07:39:36 - INFO - codeparrot_training - Step 35894: {'lr': 0.0004380392403163047, 'samples': 18378240, 'steps': 35894, 'loss/train': 1.6755306720733643} +03/05/2022 07:39:36 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 07:39:41 - INFO - codeparrot_training - Step 35895: {'lr': 0.00043803574321435093, 'samples': 18378752, 'steps': 35895, 'loss/train': 1.776163101196289} +03/05/2022 07:39:44 - INFO - codeparrot_training - Step 35896: {'lr': 0.00043803224602767115, 'samples': 18379264, 'steps': 35896, 'loss/train': 1.473827600479126} +03/05/2022 07:39:47 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 07:39:50 - INFO - codeparrot_training - Step 35897: {'lr': 0.000438028748756267, 'samples': 18379776, 'steps': 35897, 'loss/train': 1.7761696577072144} +03/05/2022 07:39:53 - INFO - codeparrot_training - Step 35898: {'lr': 0.00043802525140013994, 'samples': 18380288, 'steps': 35898, 'loss/train': 1.7302769422531128} +03/05/2022 07:39:57 - INFO - codeparrot_training - Step 35899: {'lr': 0.00043802175395929156, 'samples': 18380800, 'steps': 35899, 'loss/train': 6.198789596557617} +03/05/2022 07:39:59 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 07:40:02 - INFO - codeparrot_training - Step 35900: {'lr': 0.00043801825643372363, 'samples': 18381312, 'steps': 35900, 'loss/train': 2.493941068649292} +03/05/2022 07:40:05 - INFO - codeparrot_training - Step 35901: {'lr': 0.00043801475882343743, 'samples': 18381824, 'steps': 35901, 'loss/train': 0.8849452137947083} +03/05/2022 07:40:07 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/05/2022 07:40:10 - INFO - codeparrot_training - Step 35902: {'lr': 0.0004380112611284347, 'samples': 18382336, 'steps': 35902, 'loss/train': 1.8997044563293457} +03/05/2022 07:40:13 - INFO - codeparrot_training - Step 35903: {'lr': 0.00043800776334871705, 'samples': 18382848, 'steps': 35903, 'loss/train': 0.8492888808250427} +03/05/2022 07:40:16 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/05/2022 07:40:19 - INFO - codeparrot_training - Step 35904: {'lr': 0.000438004265484286, 'samples': 18383360, 'steps': 35904, 'loss/train': 2.1476662158966064} +03/05/2022 07:40:22 - INFO - codeparrot_training - Step 35905: {'lr': 0.0004380007675351431, 'samples': 18383872, 'steps': 35905, 'loss/train': 2.3244035243988037} +03/05/2022 07:40:24 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 07:40:27 - INFO - codeparrot_training - Step 35906: {'lr': 0.00043799726950128997, 'samples': 18384384, 'steps': 35906, 'loss/train': 1.8167943954467773} +03/05/2022 07:40:31 - INFO - codeparrot_training - Step 35907: {'lr': 0.0004379937713827282, 'samples': 18384896, 'steps': 35907, 'loss/train': 3.1521358489990234} +03/05/2022 07:40:33 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/05/2022 07:40:36 - INFO - codeparrot_training - Step 35908: {'lr': 0.0004379902731794593, 'samples': 18385408, 'steps': 35908, 'loss/train': 1.706661581993103} +03/05/2022 07:40:39 - INFO - codeparrot_training - Step 35909: {'lr': 0.00043798677489148487, 'samples': 18385920, 'steps': 35909, 'loss/train': 1.45589017868042} +03/05/2022 07:40:41 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 07:40:44 - INFO - codeparrot_training - Step 35910: {'lr': 0.0004379832765188065, 'samples': 18386432, 'steps': 35910, 'loss/train': 0.8694603443145752} +03/05/2022 07:40:47 - INFO - codeparrot_training - Step 35911: {'lr': 0.00043797977806142585, 'samples': 18386944, 'steps': 35911, 'loss/train': 2.3876607418060303} +03/05/2022 07:40:50 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 07:40:53 - INFO - codeparrot_training - Step 35912: {'lr': 0.0004379762795193443, 'samples': 18387456, 'steps': 35912, 'loss/train': 1.6392831802368164} +03/05/2022 07:40:56 - INFO - codeparrot_training - Step 35913: {'lr': 0.0004379727808925636, 'samples': 18387968, 'steps': 35913, 'loss/train': 1.2126848697662354} +03/05/2022 07:41:00 - INFO - codeparrot_training - Step 35914: {'lr': 0.00043796928218108527, 'samples': 18388480, 'steps': 35914, 'loss/train': 2.2581064701080322} +03/05/2022 07:41:01 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 07:41:05 - INFO - codeparrot_training - Step 35915: {'lr': 0.0004379657833849109, 'samples': 18388992, 'steps': 35915, 'loss/train': 0.9591162800788879} +03/05/2022 07:41:08 - INFO - codeparrot_training - Step 35916: {'lr': 0.000437962284504042, 'samples': 18389504, 'steps': 35916, 'loss/train': 0.829038679599762} +03/05/2022 07:41:09 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/05/2022 07:41:13 - INFO - codeparrot_training - Step 35917: {'lr': 0.00043795878553848025, 'samples': 18390016, 'steps': 35917, 'loss/train': 2.1771657466888428} +03/05/2022 07:41:16 - INFO - codeparrot_training - Step 35918: {'lr': 0.0004379552864882271, 'samples': 18390528, 'steps': 35918, 'loss/train': 1.8311961889266968} +03/05/2022 07:41:18 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/05/2022 07:41:22 - INFO - codeparrot_training - Step 35919: {'lr': 0.00043795178735328425, 'samples': 18391040, 'steps': 35919, 'loss/train': 1.9642060995101929} +03/05/2022 07:41:25 - INFO - codeparrot_training - Step 35920: {'lr': 0.0004379482881336532, 'samples': 18391552, 'steps': 35920, 'loss/train': 1.6833932399749756} +03/05/2022 07:41:26 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/05/2022 07:41:30 - INFO - codeparrot_training - Step 35921: {'lr': 0.0004379447888293355, 'samples': 18392064, 'steps': 35921, 'loss/train': 1.6987532377243042} +03/05/2022 07:41:33 - INFO - codeparrot_training - Step 35922: {'lr': 0.0004379412894403328, 'samples': 18392576, 'steps': 35922, 'loss/train': 2.100895643234253} +03/05/2022 07:41:34 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/05/2022 07:41:39 - INFO - codeparrot_training - Step 35923: {'lr': 0.0004379377899666468, 'samples': 18393088, 'steps': 35923, 'loss/train': 1.6641383171081543} +03/05/2022 07:41:42 - INFO - codeparrot_training - Step 35924: {'lr': 0.0004379342904082788, 'samples': 18393600, 'steps': 35924, 'loss/train': 2.1020548343658447} +03/05/2022 07:41:43 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 07:41:47 - INFO - codeparrot_training - Step 35925: {'lr': 0.00043793079076523053, 'samples': 18394112, 'steps': 35925, 'loss/train': 0.6086430549621582} +03/05/2022 07:41:50 - INFO - codeparrot_training - Step 35926: {'lr': 0.0004379272910375035, 'samples': 18394624, 'steps': 35926, 'loss/train': 0.35656875371932983} +03/05/2022 07:41:52 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/05/2022 07:41:56 - INFO - codeparrot_training - Step 35927: {'lr': 0.0004379237912250994, 'samples': 18395136, 'steps': 35927, 'loss/train': 1.3075683116912842} +03/05/2022 07:41:59 - INFO - codeparrot_training - Step 35928: {'lr': 0.0004379202913280197, 'samples': 18395648, 'steps': 35928, 'loss/train': 1.9781527519226074} +03/05/2022 07:42:01 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/05/2022 07:42:04 - INFO - codeparrot_training - Step 35929: {'lr': 0.0004379167913462661, 'samples': 18396160, 'steps': 35929, 'loss/train': 2.9559319019317627} +03/05/2022 07:42:07 - INFO - codeparrot_training - Step 35930: {'lr': 0.00043791329127984004, 'samples': 18396672, 'steps': 35930, 'loss/train': 2.3536975383758545} +03/05/2022 07:42:09 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 07:42:12 - INFO - codeparrot_training - Step 35931: {'lr': 0.0004379097911287431, 'samples': 18397184, 'steps': 35931, 'loss/train': 1.2417542934417725} +03/05/2022 07:42:16 - INFO - codeparrot_training - Step 35932: {'lr': 0.000437906290892977, 'samples': 18397696, 'steps': 35932, 'loss/train': 2.0973689556121826} +03/05/2022 07:42:17 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 07:42:21 - INFO - codeparrot_training - Step 35933: {'lr': 0.00043790279057254314, 'samples': 18398208, 'steps': 35933, 'loss/train': 1.8377655744552612} +03/05/2022 07:42:24 - INFO - codeparrot_training - Step 35934: {'lr': 0.00043789929016744324, 'samples': 18398720, 'steps': 35934, 'loss/train': 1.5965042114257812} +03/05/2022 07:42:26 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 07:42:29 - INFO - codeparrot_training - Step 35935: {'lr': 0.0004378957896776787, 'samples': 18399232, 'steps': 35935, 'loss/train': 2.51243257522583} +03/05/2022 07:42:32 - INFO - codeparrot_training - Step 35936: {'lr': 0.0004378922891032514, 'samples': 18399744, 'steps': 35936, 'loss/train': 1.9373443126678467} +03/05/2022 07:42:34 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/05/2022 07:42:38 - INFO - codeparrot_training - Step 35937: {'lr': 0.0004378887884441626, 'samples': 18400256, 'steps': 35937, 'loss/train': 1.3188179731369019} +03/05/2022 07:42:41 - INFO - codeparrot_training - Step 35938: {'lr': 0.000437885287700414, 'samples': 18400768, 'steps': 35938, 'loss/train': 2.3480968475341797} +03/05/2022 07:42:43 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 07:42:46 - INFO - codeparrot_training - Step 35939: {'lr': 0.0004378817868720073, 'samples': 18401280, 'steps': 35939, 'loss/train': 1.3014940023422241} +03/05/2022 07:42:49 - INFO - codeparrot_training - Step 35940: {'lr': 0.0004378782859589439, 'samples': 18401792, 'steps': 35940, 'loss/train': 0.8093006014823914} +03/05/2022 07:42:51 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 07:42:55 - INFO - codeparrot_training - Step 35941: {'lr': 0.00043787478496122546, 'samples': 18402304, 'steps': 35941, 'loss/train': 1.4561192989349365} +03/05/2022 07:42:58 - INFO - codeparrot_training - Step 35942: {'lr': 0.0004378712838788536, 'samples': 18402816, 'steps': 35942, 'loss/train': 1.4554587602615356} +03/05/2022 07:43:00 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 07:43:03 - INFO - codeparrot_training - Step 35943: {'lr': 0.0004378677827118297, 'samples': 18403328, 'steps': 35943, 'loss/train': 1.6701472997665405} +03/05/2022 07:43:06 - INFO - codeparrot_training - Step 35944: {'lr': 0.0004378642814601556, 'samples': 18403840, 'steps': 35944, 'loss/train': 2.287165641784668} +03/05/2022 07:43:08 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 07:43:11 - INFO - codeparrot_training - Step 35945: {'lr': 0.0004378607801238327, 'samples': 18404352, 'steps': 35945, 'loss/train': 1.367347002029419} +03/05/2022 07:43:15 - INFO - codeparrot_training - Step 35946: {'lr': 0.00043785727870286265, 'samples': 18404864, 'steps': 35946, 'loss/train': 1.82753586769104} +03/05/2022 07:43:16 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/05/2022 07:43:20 - INFO - codeparrot_training - Step 35947: {'lr': 0.00043785377719724697, 'samples': 18405376, 'steps': 35947, 'loss/train': 1.7543127536773682} +03/05/2022 07:43:23 - INFO - codeparrot_training - Step 35948: {'lr': 0.0004378502756069873, 'samples': 18405888, 'steps': 35948, 'loss/train': 2.7939560413360596} +03/05/2022 07:43:25 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 07:43:28 - INFO - codeparrot_training - Step 35949: {'lr': 0.0004378467739320852, 'samples': 18406400, 'steps': 35949, 'loss/train': 2.0785772800445557} +03/05/2022 07:43:31 - INFO - codeparrot_training - Step 35950: {'lr': 0.0004378432721725422, 'samples': 18406912, 'steps': 35950, 'loss/train': 1.8044590950012207} +03/05/2022 07:43:33 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 07:43:37 - INFO - codeparrot_training - Step 35951: {'lr': 0.00043783977032836, 'samples': 18407424, 'steps': 35951, 'loss/train': 1.4472506046295166} +03/05/2022 07:43:40 - INFO - codeparrot_training - Step 35952: {'lr': 0.00043783626839954005, 'samples': 18407936, 'steps': 35952, 'loss/train': 1.9294869899749756} +03/05/2022 07:43:42 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 07:43:45 - INFO - codeparrot_training - Step 35953: {'lr': 0.0004378327663860839, 'samples': 18408448, 'steps': 35953, 'loss/train': 2.2978625297546387} +03/05/2022 07:43:48 - INFO - codeparrot_training - Step 35954: {'lr': 0.00043782926428799333, 'samples': 18408960, 'steps': 35954, 'loss/train': 2.6325531005859375} +03/05/2022 07:43:50 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/05/2022 07:43:54 - INFO - codeparrot_training - Step 35955: {'lr': 0.0004378257621052698, 'samples': 18409472, 'steps': 35955, 'loss/train': 1.1020407676696777} +03/05/2022 07:43:57 - INFO - codeparrot_training - Step 35956: {'lr': 0.0004378222598379148, 'samples': 18409984, 'steps': 35956, 'loss/train': 1.9964470863342285} +03/05/2022 07:43:59 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 07:44:02 - INFO - codeparrot_training - Step 35957: {'lr': 0.00043781875748593, 'samples': 18410496, 'steps': 35957, 'loss/train': 1.7800228595733643} +03/05/2022 07:44:05 - INFO - codeparrot_training - Step 35958: {'lr': 0.000437815255049317, 'samples': 18411008, 'steps': 35958, 'loss/train': 1.2472730875015259} +03/05/2022 07:44:07 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 07:44:11 - INFO - codeparrot_training - Step 35959: {'lr': 0.0004378117525280773, 'samples': 18411520, 'steps': 35959, 'loss/train': 1.005857229232788} +03/05/2022 07:44:14 - INFO - codeparrot_training - Step 35960: {'lr': 0.00043780824992221257, 'samples': 18412032, 'steps': 35960, 'loss/train': 2.2238831520080566} +03/05/2022 07:44:15 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 07:44:19 - INFO - codeparrot_training - Step 35961: {'lr': 0.00043780474723172433, 'samples': 18412544, 'steps': 35961, 'loss/train': 1.3160045146942139} +03/05/2022 07:44:22 - INFO - codeparrot_training - Step 35962: {'lr': 0.00043780124445661416, 'samples': 18413056, 'steps': 35962, 'loss/train': 1.106994390487671} +03/05/2022 07:44:24 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 07:44:28 - INFO - codeparrot_training - Step 35963: {'lr': 0.00043779774159688364, 'samples': 18413568, 'steps': 35963, 'loss/train': 1.735798716545105} +03/05/2022 07:44:31 - INFO - codeparrot_training - Step 35964: {'lr': 0.00043779423865253434, 'samples': 18414080, 'steps': 35964, 'loss/train': 1.217540979385376} +03/05/2022 07:44:32 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/05/2022 07:44:36 - INFO - codeparrot_training - Step 35965: {'lr': 0.00043779073562356783, 'samples': 18414592, 'steps': 35965, 'loss/train': 2.442396640777588} +03/05/2022 07:44:39 - INFO - codeparrot_training - Step 35966: {'lr': 0.0004377872325099858, 'samples': 18415104, 'steps': 35966, 'loss/train': 1.8784565925598145} +03/05/2022 07:44:41 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/05/2022 07:44:45 - INFO - codeparrot_training - Step 35967: {'lr': 0.00043778372931178974, 'samples': 18415616, 'steps': 35967, 'loss/train': 0.4709174633026123} +03/05/2022 07:44:48 - INFO - codeparrot_training - Step 35968: {'lr': 0.00043778022602898115, 'samples': 18416128, 'steps': 35968, 'loss/train': 1.527003526687622} +03/05/2022 07:44:50 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 07:44:53 - INFO - codeparrot_training - Step 35969: {'lr': 0.0004377767226615617, 'samples': 18416640, 'steps': 35969, 'loss/train': 0.8941778540611267} +03/05/2022 07:44:56 - INFO - codeparrot_training - Step 35970: {'lr': 0.000437773219209533, 'samples': 18417152, 'steps': 35970, 'loss/train': 1.0317169427871704} +03/05/2022 07:44:58 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 07:45:02 - INFO - codeparrot_training - Step 35971: {'lr': 0.00043776971567289656, 'samples': 18417664, 'steps': 35971, 'loss/train': 1.9350649118423462} +03/05/2022 07:45:05 - INFO - codeparrot_training - Step 35972: {'lr': 0.00043776621205165404, 'samples': 18418176, 'steps': 35972, 'loss/train': 1.9036024808883667} +03/05/2022 07:45:07 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/05/2022 07:45:10 - INFO - codeparrot_training - Step 35973: {'lr': 0.0004377627083458069, 'samples': 18418688, 'steps': 35973, 'loss/train': 1.7550768852233887} +03/05/2022 07:45:13 - INFO - codeparrot_training - Step 35974: {'lr': 0.0004377592045553568, 'samples': 18419200, 'steps': 35974, 'loss/train': 1.9465960264205933} +03/05/2022 07:45:15 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 07:45:18 - INFO - codeparrot_training - Step 35975: {'lr': 0.00043775570068030524, 'samples': 18419712, 'steps': 35975, 'loss/train': 2.381316900253296} +03/05/2022 07:45:22 - INFO - codeparrot_training - Step 35976: {'lr': 0.0004377521967206539, 'samples': 18420224, 'steps': 35976, 'loss/train': 1.018899917602539} +03/05/2022 07:45:23 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/05/2022 07:45:27 - INFO - codeparrot_training - Step 35977: {'lr': 0.00043774869267640436, 'samples': 18420736, 'steps': 35977, 'loss/train': 0.7942408323287964} +03/05/2022 07:45:30 - INFO - codeparrot_training - Step 35978: {'lr': 0.0004377451885475581, 'samples': 18421248, 'steps': 35978, 'loss/train': 1.923218011856079} +03/05/2022 07:45:32 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 07:45:36 - INFO - codeparrot_training - Step 35979: {'lr': 0.0004377416843341168, 'samples': 18421760, 'steps': 35979, 'loss/train': 1.3333343267440796} +03/05/2022 07:45:39 - INFO - codeparrot_training - Step 35980: {'lr': 0.00043773818003608203, 'samples': 18422272, 'steps': 35980, 'loss/train': 1.4476085901260376} +03/05/2022 07:45:41 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 07:45:44 - INFO - codeparrot_training - Step 35981: {'lr': 0.00043773467565345523, 'samples': 18422784, 'steps': 35981, 'loss/train': 1.7924728393554688} +03/05/2022 07:45:47 - INFO - codeparrot_training - Step 35982: {'lr': 0.0004377311711862381, 'samples': 18423296, 'steps': 35982, 'loss/train': 1.518284559249878} +03/05/2022 07:45:50 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 07:45:53 - INFO - codeparrot_training - Step 35983: {'lr': 0.0004377276666344322, 'samples': 18423808, 'steps': 35983, 'loss/train': 2.0242831707000732} +03/05/2022 07:45:56 - INFO - codeparrot_training - Step 35984: {'lr': 0.00043772416199803924, 'samples': 18424320, 'steps': 35984, 'loss/train': 1.7766319513320923} +03/05/2022 07:45:59 - INFO - codeparrot_training - Step 35985: {'lr': 0.00043772065727706053, 'samples': 18424832, 'steps': 35985, 'loss/train': 2.3376874923706055} +03/05/2022 07:46:00 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 07:46:05 - INFO - codeparrot_training - Step 35986: {'lr': 0.0004377171524714978, 'samples': 18425344, 'steps': 35986, 'loss/train': 1.4031537771224976} +03/05/2022 07:46:08 - INFO - codeparrot_training - Step 35987: {'lr': 0.0004377136475813527, 'samples': 18425856, 'steps': 35987, 'loss/train': 1.650159239768982} +03/05/2022 07:46:08 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/05/2022 07:46:13 - INFO - codeparrot_training - Step 35988: {'lr': 0.0004377101426066266, 'samples': 18426368, 'steps': 35988, 'loss/train': 1.8247898817062378} +03/05/2022 07:46:16 - INFO - codeparrot_training - Step 35989: {'lr': 0.0004377066375473213, 'samples': 18426880, 'steps': 35989, 'loss/train': 1.8686864376068115} +03/05/2022 07:46:17 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/05/2022 07:46:22 - INFO - codeparrot_training - Step 35990: {'lr': 0.00043770313240343826, 'samples': 18427392, 'steps': 35990, 'loss/train': 1.5913472175598145} +03/05/2022 07:46:25 - INFO - codeparrot_training - Step 35991: {'lr': 0.00043769962717497916, 'samples': 18427904, 'steps': 35991, 'loss/train': 0.7668764591217041} +03/05/2022 07:46:25 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 07:46:30 - INFO - codeparrot_training - Step 35992: {'lr': 0.0004376961218619454, 'samples': 18428416, 'steps': 35992, 'loss/train': 2.1044797897338867} +03/05/2022 07:46:33 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/05/2022 07:46:35 - INFO - codeparrot_training - Step 35993: {'lr': 0.00043769261646433867, 'samples': 18428928, 'steps': 35993, 'loss/train': 2.0589356422424316} +03/05/2022 07:46:39 - INFO - codeparrot_training - Step 35994: {'lr': 0.0004376891109821606, 'samples': 18429440, 'steps': 35994, 'loss/train': 2.0715579986572266} +03/05/2022 07:46:41 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/05/2022 07:46:44 - INFO - codeparrot_training - Step 35995: {'lr': 0.0004376856054154127, 'samples': 18429952, 'steps': 35995, 'loss/train': 0.7547063827514648} +03/05/2022 07:46:47 - INFO - codeparrot_training - Step 35996: {'lr': 0.00043768209976409645, 'samples': 18430464, 'steps': 35996, 'loss/train': 1.2887649536132812} +03/05/2022 07:46:50 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 07:46:52 - INFO - codeparrot_training - Step 35997: {'lr': 0.0004376785940282137, 'samples': 18430976, 'steps': 35997, 'loss/train': 1.4761664867401123} +03/05/2022 07:46:55 - INFO - codeparrot_training - Step 35998: {'lr': 0.0004376750882077658, 'samples': 18431488, 'steps': 35998, 'loss/train': 1.7713323831558228} +03/05/2022 07:46:58 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 07:47:01 - INFO - codeparrot_training - Step 35999: {'lr': 0.0004376715823027544, 'samples': 18432000, 'steps': 35999, 'loss/train': 1.7673873901367188} +03/05/2022 07:47:04 - INFO - codeparrot_training - Step 36000: {'lr': 0.0004376680763131811, 'samples': 18432512, 'steps': 36000, 'loss/train': 2.2958476543426514} +03/05/2022 07:47:07 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/05/2022 07:47:09 - INFO - codeparrot_training - Step 36001: {'lr': 0.0004376645702390475, 'samples': 18433024, 'steps': 36001, 'loss/train': 2.097511053085327} +03/05/2022 07:47:12 - INFO - codeparrot_training - Step 36002: {'lr': 0.00043766106408035506, 'samples': 18433536, 'steps': 36002, 'loss/train': 1.776561975479126} +03/05/2022 07:47:15 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 07:47:18 - INFO - codeparrot_training - Step 36003: {'lr': 0.0004376575578371055, 'samples': 18434048, 'steps': 36003, 'loss/train': 1.5307483673095703} +03/05/2022 07:47:21 - INFO - codeparrot_training - Step 36004: {'lr': 0.0004376540515093003, 'samples': 18434560, 'steps': 36004, 'loss/train': 2.2224555015563965} +03/05/2022 07:47:24 - INFO - codeparrot_training - Step 36005: {'lr': 0.0004376505450969411, 'samples': 18435072, 'steps': 36005, 'loss/train': 1.0934361219406128} +03/05/2022 07:47:24 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/05/2022 07:47:30 - INFO - codeparrot_training - Step 36006: {'lr': 0.0004376470386000294, 'samples': 18435584, 'steps': 36006, 'loss/train': 1.42449152469635} +03/05/2022 07:47:33 - INFO - codeparrot_training - Step 36007: {'lr': 0.0004376435320185669, 'samples': 18436096, 'steps': 36007, 'loss/train': 0.8030275702476501} +03/05/2022 07:47:34 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/05/2022 07:47:38 - INFO - codeparrot_training - Step 36008: {'lr': 0.0004376400253525551, 'samples': 18436608, 'steps': 36008, 'loss/train': 1.8778250217437744} +03/05/2022 07:47:41 - INFO - codeparrot_training - Step 36009: {'lr': 0.0004376365186019956, 'samples': 18437120, 'steps': 36009, 'loss/train': 1.8812497854232788} +03/05/2022 07:47:42 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 07:47:46 - INFO - codeparrot_training - Step 36010: {'lr': 0.00043763301176689, 'samples': 18437632, 'steps': 36010, 'loss/train': 1.4928123950958252} +03/05/2022 07:47:50 - INFO - codeparrot_training - Step 36011: {'lr': 0.0004376295048472399, 'samples': 18438144, 'steps': 36011, 'loss/train': 1.6759164333343506} +03/05/2022 07:47:51 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 07:47:55 - INFO - codeparrot_training - Step 36012: {'lr': 0.0004376259978430468, 'samples': 18438656, 'steps': 36012, 'loss/train': 2.0091207027435303} +03/05/2022 07:47:58 - INFO - codeparrot_training - Step 36013: {'lr': 0.0004376224907543123, 'samples': 18439168, 'steps': 36013, 'loss/train': 1.7162997722625732} +03/05/2022 07:47:59 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 07:48:03 - INFO - codeparrot_training - Step 36014: {'lr': 0.00043761898358103804, 'samples': 18439680, 'steps': 36014, 'loss/train': 1.0046173334121704} +03/05/2022 07:48:06 - INFO - codeparrot_training - Step 36015: {'lr': 0.0004376154763232255, 'samples': 18440192, 'steps': 36015, 'loss/train': 1.843575358390808} +03/05/2022 07:48:08 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 07:48:12 - INFO - codeparrot_training - Step 36016: {'lr': 0.0004376119689808764, 'samples': 18440704, 'steps': 36016, 'loss/train': 1.7849065065383911} +03/05/2022 07:48:15 - INFO - codeparrot_training - Step 36017: {'lr': 0.00043760846155399216, 'samples': 18441216, 'steps': 36017, 'loss/train': 2.235692262649536} +03/05/2022 07:48:16 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/05/2022 07:48:20 - INFO - codeparrot_training - Step 36018: {'lr': 0.0004376049540425745, 'samples': 18441728, 'steps': 36018, 'loss/train': 1.2385354042053223} +03/05/2022 07:48:23 - INFO - codeparrot_training - Step 36019: {'lr': 0.0004376014464466249, 'samples': 18442240, 'steps': 36019, 'loss/train': 1.3960278034210205} +03/05/2022 07:48:25 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/05/2022 07:48:29 - INFO - codeparrot_training - Step 36020: {'lr': 0.0004375979387661451, 'samples': 18442752, 'steps': 36020, 'loss/train': 2.5139732360839844} +03/05/2022 07:48:32 - INFO - codeparrot_training - Step 36021: {'lr': 0.0004375944310011364, 'samples': 18443264, 'steps': 36021, 'loss/train': 1.8864893913269043} +03/05/2022 07:48:33 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 07:48:37 - INFO - codeparrot_training - Step 36022: {'lr': 0.00043759092315160064, 'samples': 18443776, 'steps': 36022, 'loss/train': 1.771661639213562} +03/05/2022 07:48:40 - INFO - codeparrot_training - Step 36023: {'lr': 0.00043758741521753925, 'samples': 18444288, 'steps': 36023, 'loss/train': 1.9043145179748535} +03/05/2022 07:48:41 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/05/2022 07:48:46 - INFO - codeparrot_training - Step 36024: {'lr': 0.0004375839071989539, 'samples': 18444800, 'steps': 36024, 'loss/train': 1.9852889776229858} +03/05/2022 07:48:49 - INFO - codeparrot_training - Step 36025: {'lr': 0.00043758039909584613, 'samples': 18445312, 'steps': 36025, 'loss/train': 1.680732011795044} +03/05/2022 07:48:50 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/05/2022 07:48:54 - INFO - codeparrot_training - Step 36026: {'lr': 0.0004375768909082175, 'samples': 18445824, 'steps': 36026, 'loss/train': 2.077925682067871} +03/05/2022 07:48:57 - INFO - codeparrot_training - Step 36027: {'lr': 0.0004375733826360697, 'samples': 18446336, 'steps': 36027, 'loss/train': 1.7050411701202393} +03/05/2022 07:48:59 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 07:49:03 - INFO - codeparrot_training - Step 36028: {'lr': 0.0004375698742794042, 'samples': 18446848, 'steps': 36028, 'loss/train': 0.2357165366411209} +03/05/2022 07:49:06 - INFO - codeparrot_training - Step 36029: {'lr': 0.0004375663658382225, 'samples': 18447360, 'steps': 36029, 'loss/train': 1.7269304990768433} +03/05/2022 07:49:08 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/05/2022 07:49:11 - INFO - codeparrot_training - Step 36030: {'lr': 0.0004375628573125264, 'samples': 18447872, 'steps': 36030, 'loss/train': 1.5712928771972656} +03/05/2022 07:49:14 - INFO - codeparrot_training - Step 36031: {'lr': 0.0004375593487023174, 'samples': 18448384, 'steps': 36031, 'loss/train': 1.0479099750518799} +03/05/2022 07:49:17 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 07:49:20 - INFO - codeparrot_training - Step 36032: {'lr': 0.00043755584000759696, 'samples': 18448896, 'steps': 36032, 'loss/train': 0.9133749008178711} +03/05/2022 07:49:23 - INFO - codeparrot_training - Step 36033: {'lr': 0.0004375523312283668, 'samples': 18449408, 'steps': 36033, 'loss/train': 1.503010869026184} +03/05/2022 07:49:25 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/05/2022 07:49:28 - INFO - codeparrot_training - Step 36034: {'lr': 0.00043754882236462844, 'samples': 18449920, 'steps': 36034, 'loss/train': 2.320054054260254} +03/05/2022 07:49:31 - INFO - codeparrot_training - Step 36035: {'lr': 0.00043754531341638346, 'samples': 18450432, 'steps': 36035, 'loss/train': 2.1610043048858643} +03/05/2022 07:49:34 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 07:49:37 - INFO - codeparrot_training - Step 36036: {'lr': 0.00043754180438363344, 'samples': 18450944, 'steps': 36036, 'loss/train': 2.3736748695373535} +03/05/2022 07:49:40 - INFO - codeparrot_training - Step 36037: {'lr': 0.00043753829526638, 'samples': 18451456, 'steps': 36037, 'loss/train': 2.0576727390289307} +03/05/2022 07:49:42 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 07:49:45 - INFO - codeparrot_training - Step 36038: {'lr': 0.0004375347860646247, 'samples': 18451968, 'steps': 36038, 'loss/train': 0.9850777387619019} +03/05/2022 07:49:48 - INFO - codeparrot_training - Step 36039: {'lr': 0.00043753127677836917, 'samples': 18452480, 'steps': 36039, 'loss/train': 1.829777717590332} +03/05/2022 07:49:51 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 07:49:53 - INFO - codeparrot_training - Step 36040: {'lr': 0.0004375277674076149, 'samples': 18452992, 'steps': 36040, 'loss/train': 2.5125606060028076} +03/05/2022 07:49:57 - INFO - codeparrot_training - Step 36041: {'lr': 0.0004375242579523635, 'samples': 18453504, 'steps': 36041, 'loss/train': 0.8594634532928467} +03/05/2022 07:49:59 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/05/2022 07:50:02 - INFO - codeparrot_training - Step 36042: {'lr': 0.0004375207484126166, 'samples': 18454016, 'steps': 36042, 'loss/train': 1.9077296257019043} +03/05/2022 07:50:05 - INFO - codeparrot_training - Step 36043: {'lr': 0.0004375172387883757, 'samples': 18454528, 'steps': 36043, 'loss/train': 1.8142083883285522} +03/05/2022 07:50:07 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 07:50:10 - INFO - codeparrot_training - Step 36044: {'lr': 0.00043751372907964247, 'samples': 18455040, 'steps': 36044, 'loss/train': 1.7211384773254395} +03/05/2022 07:50:13 - INFO - codeparrot_training - Step 36045: {'lr': 0.00043751021928641845, 'samples': 18455552, 'steps': 36045, 'loss/train': 0.13927248120307922} +03/05/2022 07:50:16 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 07:50:19 - INFO - codeparrot_training - Step 36046: {'lr': 0.0004375067094087051, 'samples': 18456064, 'steps': 36046, 'loss/train': 1.8733571767807007} +03/05/2022 07:50:22 - INFO - codeparrot_training - Step 36047: {'lr': 0.0004375031994465042, 'samples': 18456576, 'steps': 36047, 'loss/train': 1.6457723379135132} +03/05/2022 07:50:24 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 07:50:27 - INFO - codeparrot_training - Step 36048: {'lr': 0.00043749968939981734, 'samples': 18457088, 'steps': 36048, 'loss/train': 1.310285210609436} +03/05/2022 07:50:31 - INFO - codeparrot_training - Step 36049: {'lr': 0.0004374961792686459, 'samples': 18457600, 'steps': 36049, 'loss/train': 2.435058832168579} +03/05/2022 07:50:32 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 07:50:36 - INFO - codeparrot_training - Step 36050: {'lr': 0.00043749266905299155, 'samples': 18458112, 'steps': 36050, 'loss/train': 0.5977001786231995} +03/05/2022 07:50:39 - INFO - codeparrot_training - Step 36051: {'lr': 0.000437489158752856, 'samples': 18458624, 'steps': 36051, 'loss/train': 1.969411015510559} +03/05/2022 07:50:42 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/05/2022 07:50:45 - INFO - codeparrot_training - Step 36052: {'lr': 0.00043748564836824065, 'samples': 18459136, 'steps': 36052, 'loss/train': 0.9437122941017151} +03/05/2022 07:50:48 - INFO - codeparrot_training - Step 36053: {'lr': 0.0004374821378991473, 'samples': 18459648, 'steps': 36053, 'loss/train': 2.2931623458862305} +03/05/2022 07:50:50 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 07:50:53 - INFO - codeparrot_training - Step 36054: {'lr': 0.0004374786273455772, 'samples': 18460160, 'steps': 36054, 'loss/train': 1.9026390314102173} +03/05/2022 07:50:56 - INFO - codeparrot_training - Step 36055: {'lr': 0.0004374751167075322, 'samples': 18460672, 'steps': 36055, 'loss/train': 1.6958110332489014} +03/05/2022 07:50:59 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 07:51:02 - INFO - codeparrot_training - Step 36056: {'lr': 0.0004374716059850138, 'samples': 18461184, 'steps': 36056, 'loss/train': 1.5859400033950806} +03/05/2022 07:51:05 - INFO - codeparrot_training - Step 36057: {'lr': 0.0004374680951780236, 'samples': 18461696, 'steps': 36057, 'loss/train': 0.7874780893325806} +03/05/2022 07:51:07 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/05/2022 07:51:10 - INFO - codeparrot_training - Step 36058: {'lr': 0.00043746458428656324, 'samples': 18462208, 'steps': 36058, 'loss/train': 2.397481918334961} +03/05/2022 07:51:13 - INFO - codeparrot_training - Step 36059: {'lr': 0.00043746107331063414, 'samples': 18462720, 'steps': 36059, 'loss/train': 1.4412318468093872} +03/05/2022 07:51:16 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 07:51:18 - INFO - codeparrot_training - Step 36060: {'lr': 0.000437457562250238, 'samples': 18463232, 'steps': 36060, 'loss/train': 1.8971121311187744} +03/05/2022 07:51:21 - INFO - codeparrot_training - Step 36061: {'lr': 0.0004374540511053763, 'samples': 18463744, 'steps': 36061, 'loss/train': 1.9326227903366089} +03/05/2022 07:51:24 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/05/2022 07:51:27 - INFO - codeparrot_training - Step 36062: {'lr': 0.00043745053987605075, 'samples': 18464256, 'steps': 36062, 'loss/train': 2.0043246746063232} +03/05/2022 07:51:30 - INFO - codeparrot_training - Step 36063: {'lr': 0.00043744702856226295, 'samples': 18464768, 'steps': 36063, 'loss/train': 2.230461597442627} +03/05/2022 07:51:33 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/05/2022 07:51:35 - INFO - codeparrot_training - Step 36064: {'lr': 0.0004374435171640144, 'samples': 18465280, 'steps': 36064, 'loss/train': 2.3074138164520264} +03/05/2022 07:51:38 - INFO - codeparrot_training - Step 36065: {'lr': 0.0004374400056813066, 'samples': 18465792, 'steps': 36065, 'loss/train': 2.333336114883423} +03/05/2022 07:51:42 - INFO - codeparrot_training - Step 36066: {'lr': 0.0004374364941141413, 'samples': 18466304, 'steps': 36066, 'loss/train': 1.0370543003082275} +03/05/2022 07:51:42 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/05/2022 07:51:47 - INFO - codeparrot_training - Step 36067: {'lr': 0.00043743298246251994, 'samples': 18466816, 'steps': 36067, 'loss/train': 2.4303033351898193} +03/05/2022 07:51:50 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/05/2022 07:51:52 - INFO - codeparrot_training - Step 36068: {'lr': 0.00043742947072644424, 'samples': 18467328, 'steps': 36068, 'loss/train': 1.48797607421875} +03/05/2022 07:51:56 - INFO - codeparrot_training - Step 36069: {'lr': 0.0004374259589059157, 'samples': 18467840, 'steps': 36069, 'loss/train': 0.5635892748832703} +03/05/2022 07:51:58 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/05/2022 07:52:01 - INFO - codeparrot_training - Step 36070: {'lr': 0.0004374224470009359, 'samples': 18468352, 'steps': 36070, 'loss/train': 2.4696810245513916} +03/05/2022 07:52:04 - INFO - codeparrot_training - Step 36071: {'lr': 0.00043741893501150644, 'samples': 18468864, 'steps': 36071, 'loss/train': 1.4809744358062744} +03/05/2022 07:52:07 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/05/2022 07:52:09 - INFO - codeparrot_training - Step 36072: {'lr': 0.0004374154229376289, 'samples': 18469376, 'steps': 36072, 'loss/train': 1.4185858964920044} +03/05/2022 07:52:13 - INFO - codeparrot_training - Step 36073: {'lr': 0.00043741191077930486, 'samples': 18469888, 'steps': 36073, 'loss/train': 0.6192315220832825} +03/05/2022 07:52:15 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/05/2022 07:52:18 - INFO - codeparrot_training - Step 36074: {'lr': 0.00043740839853653594, 'samples': 18470400, 'steps': 36074, 'loss/train': 1.89198899269104} +03/05/2022 07:52:21 - INFO - codeparrot_training - Step 36075: {'lr': 0.0004374048862093236, 'samples': 18470912, 'steps': 36075, 'loss/train': 2.2146034240722656} +03/05/2022 07:52:24 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 07:52:26 - INFO - codeparrot_training - Step 36076: {'lr': 0.00043740137379766954, 'samples': 18471424, 'steps': 36076, 'loss/train': 1.3566384315490723} +03/05/2022 07:52:30 - INFO - codeparrot_training - Step 36077: {'lr': 0.0004373978613015753, 'samples': 18471936, 'steps': 36077, 'loss/train': 1.6380304098129272} +03/05/2022 07:52:32 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/05/2022 07:52:35 - INFO - codeparrot_training - Step 36078: {'lr': 0.00043739434872104257, 'samples': 18472448, 'steps': 36078, 'loss/train': 2.2891197204589844} +03/05/2022 07:52:38 - INFO - codeparrot_training - Step 36079: {'lr': 0.00043739083605607275, 'samples': 18472960, 'steps': 36079, 'loss/train': 2.049835443496704} +03/05/2022 07:52:40 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/05/2022 07:52:43 - INFO - codeparrot_training - Step 36080: {'lr': 0.0004373873233066676, 'samples': 18473472, 'steps': 36080, 'loss/train': 1.891786813735962} +03/05/2022 07:52:46 - INFO - codeparrot_training - Step 36081: {'lr': 0.00043738381047282856, 'samples': 18473984, 'steps': 36081, 'loss/train': 1.8513660430908203} +03/05/2022 07:52:49 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 07:52:52 - INFO - codeparrot_training - Step 36082: {'lr': 0.00043738029755455724, 'samples': 18474496, 'steps': 36082, 'loss/train': 1.5867093801498413} +03/05/2022 07:52:55 - INFO - codeparrot_training - Step 36083: {'lr': 0.00043737678455185524, 'samples': 18475008, 'steps': 36083, 'loss/train': 1.4178807735443115} +03/05/2022 07:52:57 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 07:53:00 - INFO - codeparrot_training - Step 36084: {'lr': 0.0004373732714647242, 'samples': 18475520, 'steps': 36084, 'loss/train': 5.296960353851318} +03/05/2022 07:53:03 - INFO - codeparrot_training - Step 36085: {'lr': 0.0004373697582931657, 'samples': 18476032, 'steps': 36085, 'loss/train': 1.0003697872161865} +03/05/2022 07:53:06 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 07:53:08 - INFO - codeparrot_training - Step 36086: {'lr': 0.0004373662450371812, 'samples': 18476544, 'steps': 36086, 'loss/train': 1.9179296493530273} +03/05/2022 07:53:12 - INFO - codeparrot_training - Step 36087: {'lr': 0.0004373627316967723, 'samples': 18477056, 'steps': 36087, 'loss/train': 2.0919015407562256} +03/05/2022 07:53:14 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/05/2022 07:53:17 - INFO - codeparrot_training - Step 36088: {'lr': 0.0004373592182719408, 'samples': 18477568, 'steps': 36088, 'loss/train': 1.6749885082244873} +03/05/2022 07:53:20 - INFO - codeparrot_training - Step 36089: {'lr': 0.00043735570476268804, 'samples': 18478080, 'steps': 36089, 'loss/train': 0.06244316324591637} +03/05/2022 07:53:23 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 07:53:25 - INFO - codeparrot_training - Step 36090: {'lr': 0.0004373521911690157, 'samples': 18478592, 'steps': 36090, 'loss/train': 1.9964382648468018} +03/05/2022 07:53:29 - INFO - codeparrot_training - Step 36091: {'lr': 0.00043734867749092534, 'samples': 18479104, 'steps': 36091, 'loss/train': 2.279146194458008} +03/05/2022 07:53:31 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/05/2022 07:53:34 - INFO - codeparrot_training - Step 36092: {'lr': 0.0004373451637284186, 'samples': 18479616, 'steps': 36092, 'loss/train': 1.8983275890350342} +03/05/2022 07:53:37 - INFO - codeparrot_training - Step 36093: {'lr': 0.0004373416498814969, 'samples': 18480128, 'steps': 36093, 'loss/train': 2.1691272258758545} +03/05/2022 07:53:39 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 07:53:42 - INFO - codeparrot_training - Step 36094: {'lr': 0.0004373381359501621, 'samples': 18480640, 'steps': 36094, 'loss/train': 1.713473916053772} +03/05/2022 07:53:45 - INFO - codeparrot_training - Step 36095: {'lr': 0.00043733462193441553, 'samples': 18481152, 'steps': 36095, 'loss/train': 2.030616521835327} +03/05/2022 07:53:48 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 07:53:51 - INFO - codeparrot_training - Step 36096: {'lr': 0.00043733110783425894, 'samples': 18481664, 'steps': 36096, 'loss/train': 1.8648601770401} +03/05/2022 07:53:54 - INFO - codeparrot_training - Step 36097: {'lr': 0.00043732759364969374, 'samples': 18482176, 'steps': 36097, 'loss/train': 2.78861665725708} +03/05/2022 07:53:56 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/05/2022 07:53:59 - INFO - codeparrot_training - Step 36098: {'lr': 0.0004373240793807217, 'samples': 18482688, 'steps': 36098, 'loss/train': 1.8727775812149048} +03/05/2022 07:54:02 - INFO - codeparrot_training - Step 36099: {'lr': 0.00043732056502734435, 'samples': 18483200, 'steps': 36099, 'loss/train': 1.3359142541885376} +03/05/2022 07:54:04 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 07:54:07 - INFO - codeparrot_training - Step 36100: {'lr': 0.0004373170505895632, 'samples': 18483712, 'steps': 36100, 'loss/train': 1.5899838209152222} +03/05/2022 07:54:11 - INFO - codeparrot_training - Step 36101: {'lr': 0.0004373135360673799, 'samples': 18484224, 'steps': 36101, 'loss/train': 1.8244878053665161} +03/05/2022 07:54:12 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 07:54:16 - INFO - codeparrot_training - Step 36102: {'lr': 0.000437310021460796, 'samples': 18484736, 'steps': 36102, 'loss/train': 1.494297981262207} +03/05/2022 07:54:20 - INFO - codeparrot_training - Step 36103: {'lr': 0.000437306506769813, 'samples': 18485248, 'steps': 36103, 'loss/train': 6.3272624015808105} +03/05/2022 07:54:23 - INFO - codeparrot_training - Step 36104: {'lr': 0.0004373029919944327, 'samples': 18485760, 'steps': 36104, 'loss/train': 6.188086032867432} +03/05/2022 07:54:24 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 07:54:28 - INFO - codeparrot_training - Step 36105: {'lr': 0.00043729947713465653, 'samples': 18486272, 'steps': 36105, 'loss/train': 1.921324372291565} +03/05/2022 07:54:31 - INFO - codeparrot_training - Step 36106: {'lr': 0.00043729596219048607, 'samples': 18486784, 'steps': 36106, 'loss/train': 1.467881441116333} +03/05/2022 07:54:33 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 07:54:36 - INFO - codeparrot_training - Step 36107: {'lr': 0.000437292447161923, 'samples': 18487296, 'steps': 36107, 'loss/train': 1.6574931144714355} +03/05/2022 07:54:40 - INFO - codeparrot_training - Step 36108: {'lr': 0.0004372889320489688, 'samples': 18487808, 'steps': 36108, 'loss/train': 0.5765967965126038} +03/05/2022 07:54:41 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 07:54:45 - INFO - codeparrot_training - Step 36109: {'lr': 0.00043728541685162503, 'samples': 18488320, 'steps': 36109, 'loss/train': 0.6162421107292175} +03/05/2022 07:54:48 - INFO - codeparrot_training - Step 36110: {'lr': 0.0004372819015698934, 'samples': 18488832, 'steps': 36110, 'loss/train': 0.9898927211761475} +03/05/2022 07:54:49 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/05/2022 07:54:53 - INFO - codeparrot_training - Step 36111: {'lr': 0.0004372783862037755, 'samples': 18489344, 'steps': 36111, 'loss/train': 1.8901020288467407} +03/05/2022 07:54:57 - INFO - codeparrot_training - Step 36112: {'lr': 0.00043727487075327285, 'samples': 18489856, 'steps': 36112, 'loss/train': 0.48207321763038635} +03/05/2022 07:54:58 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 07:55:02 - INFO - codeparrot_training - Step 36113: {'lr': 0.00043727135521838697, 'samples': 18490368, 'steps': 36113, 'loss/train': 2.088308334350586} +03/05/2022 07:55:05 - INFO - codeparrot_training - Step 36114: {'lr': 0.00043726783959911953, 'samples': 18490880, 'steps': 36114, 'loss/train': 2.0448715686798096} +03/05/2022 07:55:06 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/05/2022 07:55:10 - INFO - codeparrot_training - Step 36115: {'lr': 0.00043726432389547205, 'samples': 18491392, 'steps': 36115, 'loss/train': 1.8767093420028687} +03/05/2022 07:55:13 - INFO - codeparrot_training - Step 36116: {'lr': 0.00043726080810744616, 'samples': 18491904, 'steps': 36116, 'loss/train': 0.7940533757209778} +03/05/2022 07:55:15 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/05/2022 07:55:19 - INFO - codeparrot_training - Step 36117: {'lr': 0.0004372572922350435, 'samples': 18492416, 'steps': 36117, 'loss/train': 1.7426447868347168} +03/05/2022 07:55:22 - INFO - codeparrot_training - Step 36118: {'lr': 0.0004372537762782656, 'samples': 18492928, 'steps': 36118, 'loss/train': 1.850325345993042} +03/05/2022 07:55:23 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/05/2022 07:55:28 - INFO - codeparrot_training - Step 36119: {'lr': 0.00043725026023711395, 'samples': 18493440, 'steps': 36119, 'loss/train': 1.9192856550216675} +03/05/2022 07:55:31 - INFO - codeparrot_training - Step 36120: {'lr': 0.0004372467441115903, 'samples': 18493952, 'steps': 36120, 'loss/train': 0.9669541120529175} +03/05/2022 07:55:33 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 07:55:36 - INFO - codeparrot_training - Step 36121: {'lr': 0.00043724322790169613, 'samples': 18494464, 'steps': 36121, 'loss/train': 1.629615068435669} +03/05/2022 07:55:39 - INFO - codeparrot_training - Step 36122: {'lr': 0.00043723971160743305, 'samples': 18494976, 'steps': 36122, 'loss/train': 2.334636926651001} +03/05/2022 07:55:41 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 07:55:44 - INFO - codeparrot_training - Step 36123: {'lr': 0.00043723619522880266, 'samples': 18495488, 'steps': 36123, 'loss/train': 2.4031403064727783} +03/05/2022 07:55:47 - INFO - codeparrot_training - Step 36124: {'lr': 0.0004372326787658065, 'samples': 18496000, 'steps': 36124, 'loss/train': 1.3550822734832764} +03/05/2022 07:55:49 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 07:55:53 - INFO - codeparrot_training - Step 36125: {'lr': 0.00043722916221844617, 'samples': 18496512, 'steps': 36125, 'loss/train': 1.5701971054077148} +03/05/2022 07:55:56 - INFO - codeparrot_training - Step 36126: {'lr': 0.0004372256455867233, 'samples': 18497024, 'steps': 36126, 'loss/train': 1.6868315935134888} +03/05/2022 07:55:58 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/05/2022 07:56:01 - INFO - codeparrot_training - Step 36127: {'lr': 0.0004372221288706394, 'samples': 18497536, 'steps': 36127, 'loss/train': 1.7051806449890137} +03/05/2022 07:56:04 - INFO - codeparrot_training - Step 36128: {'lr': 0.0004372186120701962, 'samples': 18498048, 'steps': 36128, 'loss/train': 1.2342878580093384} +03/05/2022 07:56:06 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 07:56:10 - INFO - codeparrot_training - Step 36129: {'lr': 0.00043721509518539507, 'samples': 18498560, 'steps': 36129, 'loss/train': 1.768221139907837} +03/05/2022 07:56:13 - INFO - codeparrot_training - Step 36130: {'lr': 0.0004372115782162378, 'samples': 18499072, 'steps': 36130, 'loss/train': 2.062828779220581} +03/05/2022 07:56:14 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 07:56:18 - INFO - codeparrot_training - Step 36131: {'lr': 0.00043720806116272584, 'samples': 18499584, 'steps': 36131, 'loss/train': 2.4220993518829346} +03/05/2022 07:56:21 - INFO - codeparrot_training - Step 36132: {'lr': 0.00043720454402486076, 'samples': 18500096, 'steps': 36132, 'loss/train': 1.1807063817977905} +03/05/2022 07:56:23 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 07:56:27 - INFO - codeparrot_training - Step 36133: {'lr': 0.00043720102680264427, 'samples': 18500608, 'steps': 36133, 'loss/train': 2.1760871410369873} +03/05/2022 07:56:30 - INFO - codeparrot_training - Step 36134: {'lr': 0.0004371975094960778, 'samples': 18501120, 'steps': 36134, 'loss/train': 1.9262043237686157} +03/05/2022 07:56:31 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 07:56:35 - INFO - codeparrot_training - Step 36135: {'lr': 0.0004371939921051632, 'samples': 18501632, 'steps': 36135, 'loss/train': 0.9442529678344727} +03/05/2022 07:56:38 - INFO - codeparrot_training - Step 36136: {'lr': 0.00043719047462990174, 'samples': 18502144, 'steps': 36136, 'loss/train': 0.7386095523834229} +03/05/2022 07:56:40 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 07:56:44 - INFO - codeparrot_training - Step 36137: {'lr': 0.0004371869570702952, 'samples': 18502656, 'steps': 36137, 'loss/train': 2.030130624771118} +03/05/2022 07:56:47 - INFO - codeparrot_training - Step 36138: {'lr': 0.0004371834394263451, 'samples': 18503168, 'steps': 36138, 'loss/train': 1.6054258346557617} +03/05/2022 07:56:49 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 07:56:52 - INFO - codeparrot_training - Step 36139: {'lr': 0.000437179921698053, 'samples': 18503680, 'steps': 36139, 'loss/train': 2.243959665298462} +03/05/2022 07:56:55 - INFO - codeparrot_training - Step 36140: {'lr': 0.00043717640388542045, 'samples': 18504192, 'steps': 36140, 'loss/train': 2.0341453552246094} +03/05/2022 07:56:57 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 07:57:01 - INFO - codeparrot_training - Step 36141: {'lr': 0.00043717288598844916, 'samples': 18504704, 'steps': 36141, 'loss/train': 1.5884000062942505} +03/05/2022 07:57:04 - INFO - codeparrot_training - Step 36142: {'lr': 0.0004371693680071407, 'samples': 18505216, 'steps': 36142, 'loss/train': 1.0458205938339233} +03/05/2022 07:57:05 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 07:57:09 - INFO - codeparrot_training - Step 36143: {'lr': 0.00043716584994149657, 'samples': 18505728, 'steps': 36143, 'loss/train': 1.2511316537857056} +03/05/2022 07:57:12 - INFO - codeparrot_training - Step 36144: {'lr': 0.0004371623317915184, 'samples': 18506240, 'steps': 36144, 'loss/train': 2.0580060482025146} +03/05/2022 07:57:14 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 07:57:17 - INFO - codeparrot_training - Step 36145: {'lr': 0.00043715881355720776, 'samples': 18506752, 'steps': 36145, 'loss/train': 1.7831703424453735} +03/05/2022 07:57:21 - INFO - codeparrot_training - Step 36146: {'lr': 0.0004371552952385663, 'samples': 18507264, 'steps': 36146, 'loss/train': 0.20688287913799286} +03/05/2022 07:57:22 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 07:57:26 - INFO - codeparrot_training - Step 36147: {'lr': 0.00043715177683559546, 'samples': 18507776, 'steps': 36147, 'loss/train': 1.8477839231491089} +03/05/2022 07:57:29 - INFO - codeparrot_training - Step 36148: {'lr': 0.000437148258348297, 'samples': 18508288, 'steps': 36148, 'loss/train': 1.1993651390075684} +03/05/2022 07:57:30 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/05/2022 07:57:34 - INFO - codeparrot_training - Step 36149: {'lr': 0.0004371447397766724, 'samples': 18508800, 'steps': 36149, 'loss/train': 1.7000675201416016} +03/05/2022 07:57:37 - INFO - codeparrot_training - Step 36150: {'lr': 0.0004371412211207233, 'samples': 18509312, 'steps': 36150, 'loss/train': 1.796218752861023} +03/05/2022 07:57:39 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 07:57:43 - INFO - codeparrot_training - Step 36151: {'lr': 0.0004371377023804512, 'samples': 18509824, 'steps': 36151, 'loss/train': 2.0572822093963623} +03/05/2022 07:57:46 - INFO - codeparrot_training - Step 36152: {'lr': 0.0004371341835558578, 'samples': 18510336, 'steps': 36152, 'loss/train': 2.4493465423583984} +03/05/2022 07:57:47 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 07:57:51 - INFO - codeparrot_training - Step 36153: {'lr': 0.0004371306646469445, 'samples': 18510848, 'steps': 36153, 'loss/train': 2.734365463256836} +03/05/2022 07:57:54 - INFO - codeparrot_training - Step 36154: {'lr': 0.00043712714565371315, 'samples': 18511360, 'steps': 36154, 'loss/train': 1.4431241750717163} +03/05/2022 07:57:56 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 07:58:00 - INFO - codeparrot_training - Step 36155: {'lr': 0.0004371236265761651, 'samples': 18511872, 'steps': 36155, 'loss/train': 1.902875304222107} +03/05/2022 07:58:03 - INFO - codeparrot_training - Step 36156: {'lr': 0.0004371201074143021, 'samples': 18512384, 'steps': 36156, 'loss/train': 1.6863436698913574} +03/05/2022 07:58:04 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/05/2022 07:58:08 - INFO - codeparrot_training - Step 36157: {'lr': 0.0004371165881681256, 'samples': 18512896, 'steps': 36157, 'loss/train': 2.6713547706604004} +03/05/2022 07:58:11 - INFO - codeparrot_training - Step 36158: {'lr': 0.0004371130688376373, 'samples': 18513408, 'steps': 36158, 'loss/train': 2.0085086822509766} +03/05/2022 07:58:14 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 07:58:17 - INFO - codeparrot_training - Step 36159: {'lr': 0.00043710954942283875, 'samples': 18513920, 'steps': 36159, 'loss/train': 1.138684630393982} +03/05/2022 07:58:20 - INFO - codeparrot_training - Step 36160: {'lr': 0.0004371060299237315, 'samples': 18514432, 'steps': 36160, 'loss/train': 2.2627556324005127} +03/05/2022 07:58:22 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/05/2022 07:58:25 - INFO - codeparrot_training - Step 36161: {'lr': 0.00043710251034031713, 'samples': 18514944, 'steps': 36161, 'loss/train': 2.3857369422912598} +03/05/2022 07:58:28 - INFO - codeparrot_training - Step 36162: {'lr': 0.0004370989906725973, 'samples': 18515456, 'steps': 36162, 'loss/train': 1.485427737236023} +03/05/2022 07:58:31 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 07:58:34 - INFO - codeparrot_training - Step 36163: {'lr': 0.00043709547092057356, 'samples': 18515968, 'steps': 36163, 'loss/train': 0.8569915294647217} +03/05/2022 07:58:37 - INFO - codeparrot_training - Step 36164: {'lr': 0.00043709195108424746, 'samples': 18516480, 'steps': 36164, 'loss/train': 1.4065744876861572} +03/05/2022 07:58:40 - INFO - codeparrot_training - Step 36165: {'lr': 0.0004370884311636206, 'samples': 18516992, 'steps': 36165, 'loss/train': 1.8016481399536133} +03/05/2022 07:58:40 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 07:58:45 - INFO - codeparrot_training - Step 36166: {'lr': 0.0004370849111586946, 'samples': 18517504, 'steps': 36166, 'loss/train': 1.5769487619400024} +03/05/2022 07:58:48 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/05/2022 07:58:51 - INFO - codeparrot_training - Step 36167: {'lr': 0.000437081391069471, 'samples': 18518016, 'steps': 36167, 'loss/train': 2.027282238006592} +03/05/2022 07:58:54 - INFO - codeparrot_training - Step 36168: {'lr': 0.0004370778708959514, 'samples': 18518528, 'steps': 36168, 'loss/train': 1.6011186838150024} +03/05/2022 07:58:56 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 07:58:59 - INFO - codeparrot_training - Step 36169: {'lr': 0.00043707435063813747, 'samples': 18519040, 'steps': 36169, 'loss/train': 1.950432538986206} +03/05/2022 07:59:02 - INFO - codeparrot_training - Step 36170: {'lr': 0.0004370708302960307, 'samples': 18519552, 'steps': 36170, 'loss/train': 1.6154985427856445} +03/05/2022 07:59:06 - INFO - codeparrot_training - Step 36171: {'lr': 0.00043706730986963274, 'samples': 18520064, 'steps': 36171, 'loss/train': 2.0862300395965576} +03/05/2022 07:59:06 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/05/2022 07:59:11 - INFO - codeparrot_training - Step 36172: {'lr': 0.0004370637893589451, 'samples': 18520576, 'steps': 36172, 'loss/train': 0.3163149654865265} +03/05/2022 07:59:14 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 07:59:16 - INFO - codeparrot_training - Step 36173: {'lr': 0.0004370602687639693, 'samples': 18521088, 'steps': 36173, 'loss/train': 1.359409213066101} +03/05/2022 07:59:19 - INFO - codeparrot_training - Step 36174: {'lr': 0.00043705674808470715, 'samples': 18521600, 'steps': 36174, 'loss/train': 1.6837157011032104} +03/05/2022 07:59:22 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/05/2022 07:59:25 - INFO - codeparrot_training - Step 36175: {'lr': 0.00043705322732116007, 'samples': 18522112, 'steps': 36175, 'loss/train': 1.4574109315872192} +03/05/2022 07:59:28 - INFO - codeparrot_training - Step 36176: {'lr': 0.00043704970647332977, 'samples': 18522624, 'steps': 36176, 'loss/train': 1.7398953437805176} +03/05/2022 07:59:31 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/05/2022 07:59:33 - INFO - codeparrot_training - Step 36177: {'lr': 0.00043704618554121766, 'samples': 18523136, 'steps': 36177, 'loss/train': 1.567387342453003} +03/05/2022 07:59:36 - INFO - codeparrot_training - Step 36178: {'lr': 0.0004370426645248254, 'samples': 18523648, 'steps': 36178, 'loss/train': 2.372109889984131} +03/05/2022 07:59:40 - INFO - codeparrot_training - Step 36179: {'lr': 0.00043703914342415473, 'samples': 18524160, 'steps': 36179, 'loss/train': 1.7951995134353638} +03/05/2022 07:59:40 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 07:59:45 - INFO - codeparrot_training - Step 36180: {'lr': 0.000437035622239207, 'samples': 18524672, 'steps': 36180, 'loss/train': 1.779437780380249} +03/05/2022 07:59:48 - INFO - codeparrot_training - Step 36181: {'lr': 0.00043703210096998396, 'samples': 18525184, 'steps': 36181, 'loss/train': 2.1929433345794678} +03/05/2022 07:59:48 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 07:59:53 - INFO - codeparrot_training - Step 36182: {'lr': 0.00043702857961648713, 'samples': 18525696, 'steps': 36182, 'loss/train': 1.0545645952224731} +03/05/2022 07:59:57 - INFO - codeparrot_training - Step 36183: {'lr': 0.0004370250581787181, 'samples': 18526208, 'steps': 36183, 'loss/train': 1.8496661186218262} +03/05/2022 07:59:57 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/05/2022 08:00:02 - INFO - codeparrot_training - Step 36184: {'lr': 0.00043702153665667846, 'samples': 18526720, 'steps': 36184, 'loss/train': 0.7489533424377441} +03/05/2022 08:00:05 - INFO - codeparrot_training - Step 36185: {'lr': 0.0004370180150503698, 'samples': 18527232, 'steps': 36185, 'loss/train': 1.9026918411254883} +03/05/2022 08:00:05 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 08:00:10 - INFO - codeparrot_training - Step 36186: {'lr': 0.0004370144933597938, 'samples': 18527744, 'steps': 36186, 'loss/train': 1.7665891647338867} +03/05/2022 08:00:13 - INFO - codeparrot_training - Step 36187: {'lr': 0.00043701097158495186, 'samples': 18528256, 'steps': 36187, 'loss/train': 1.2755967378616333} +03/05/2022 08:00:14 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 08:00:19 - INFO - codeparrot_training - Step 36188: {'lr': 0.0004370074497258456, 'samples': 18528768, 'steps': 36188, 'loss/train': 1.1353886127471924} +03/05/2022 08:00:22 - INFO - codeparrot_training - Step 36189: {'lr': 0.00043700392778247676, 'samples': 18529280, 'steps': 36189, 'loss/train': 1.8752729892730713} +03/05/2022 08:00:23 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 08:00:27 - INFO - codeparrot_training - Step 36190: {'lr': 0.0004370004057548468, 'samples': 18529792, 'steps': 36190, 'loss/train': 2.0424540042877197} +03/05/2022 08:00:30 - INFO - codeparrot_training - Step 36191: {'lr': 0.0004369968836429574, 'samples': 18530304, 'steps': 36191, 'loss/train': 2.411482810974121} +03/05/2022 08:00:31 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 08:00:36 - INFO - codeparrot_training - Step 36192: {'lr': 0.0004369933614468101, 'samples': 18530816, 'steps': 36192, 'loss/train': 1.0778470039367676} +03/05/2022 08:00:39 - INFO - codeparrot_training - Step 36193: {'lr': 0.0004369898391664064, 'samples': 18531328, 'steps': 36193, 'loss/train': 1.1295086145401} +03/05/2022 08:00:39 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 08:00:44 - INFO - codeparrot_training - Step 36194: {'lr': 0.000436986316801748, 'samples': 18531840, 'steps': 36194, 'loss/train': 2.2310383319854736} +03/05/2022 08:00:47 - INFO - codeparrot_training - Step 36195: {'lr': 0.00043698279435283637, 'samples': 18532352, 'steps': 36195, 'loss/train': 2.482656240463257} +03/05/2022 08:00:47 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/05/2022 08:00:53 - INFO - codeparrot_training - Step 36196: {'lr': 0.0004369792718196733, 'samples': 18532864, 'steps': 36196, 'loss/train': 2.0867481231689453} +03/05/2022 08:00:56 - INFO - codeparrot_training - Step 36197: {'lr': 0.0004369757492022602, 'samples': 18533376, 'steps': 36197, 'loss/train': 3.106039047241211} +03/05/2022 08:00:56 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/05/2022 08:01:01 - INFO - codeparrot_training - Step 36198: {'lr': 0.00043697222650059876, 'samples': 18533888, 'steps': 36198, 'loss/train': 0.714049756526947} +03/05/2022 08:01:04 - INFO - codeparrot_training - Step 36199: {'lr': 0.00043696870371469045, 'samples': 18534400, 'steps': 36199, 'loss/train': 1.5154335498809814} +03/05/2022 08:01:04 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/05/2022 08:01:10 - INFO - codeparrot_training - Step 36200: {'lr': 0.000436965180844537, 'samples': 18534912, 'steps': 36200, 'loss/train': 1.8014168739318848} +03/05/2022 08:01:13 - INFO - codeparrot_training - Step 36201: {'lr': 0.00043696165789013986, 'samples': 18535424, 'steps': 36201, 'loss/train': 1.4415826797485352} +03/05/2022 08:01:13 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 08:01:18 - INFO - codeparrot_training - Step 36202: {'lr': 0.0004369581348515007, 'samples': 18535936, 'steps': 36202, 'loss/train': 1.9209561347961426} +03/05/2022 08:01:21 - INFO - codeparrot_training - Step 36203: {'lr': 0.00043695461172862113, 'samples': 18536448, 'steps': 36203, 'loss/train': 1.7839361429214478} +03/05/2022 08:01:21 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/05/2022 08:01:27 - INFO - codeparrot_training - Step 36204: {'lr': 0.0004369510885215026, 'samples': 18536960, 'steps': 36204, 'loss/train': 0.8831709623336792} +03/05/2022 08:01:29 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 08:01:32 - INFO - codeparrot_training - Step 36205: {'lr': 0.0004369475652301469, 'samples': 18537472, 'steps': 36205, 'loss/train': 0.9231879115104675} +03/05/2022 08:01:35 - INFO - codeparrot_training - Step 36206: {'lr': 0.0004369440418545555, 'samples': 18537984, 'steps': 36206, 'loss/train': 1.405003309249878} +03/05/2022 08:01:38 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/05/2022 08:01:40 - INFO - codeparrot_training - Step 36207: {'lr': 0.00043694051839472995, 'samples': 18538496, 'steps': 36207, 'loss/train': 1.5588219165802002} +03/05/2022 08:01:43 - INFO - codeparrot_training - Step 36208: {'lr': 0.00043693699485067186, 'samples': 18539008, 'steps': 36208, 'loss/train': 0.14335809648036957} +03/05/2022 08:01:46 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/05/2022 08:01:49 - INFO - codeparrot_training - Step 36209: {'lr': 0.0004369334712223829, 'samples': 18539520, 'steps': 36209, 'loss/train': 1.48665452003479} +03/05/2022 08:01:52 - INFO - codeparrot_training - Step 36210: {'lr': 0.0004369299475098646, 'samples': 18540032, 'steps': 36210, 'loss/train': 1.774695634841919} +03/05/2022 08:01:54 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/05/2022 08:01:57 - INFO - codeparrot_training - Step 36211: {'lr': 0.00043692642371311854, 'samples': 18540544, 'steps': 36211, 'loss/train': 1.3025870323181152} +03/05/2022 08:02:00 - INFO - codeparrot_training - Step 36212: {'lr': 0.00043692289983214626, 'samples': 18541056, 'steps': 36212, 'loss/train': 0.9386810064315796} +03/05/2022 08:02:02 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 08:02:05 - INFO - codeparrot_training - Step 36213: {'lr': 0.0004369193758669495, 'samples': 18541568, 'steps': 36213, 'loss/train': 2.5086231231689453} +03/05/2022 08:02:09 - INFO - codeparrot_training - Step 36214: {'lr': 0.0004369158518175297, 'samples': 18542080, 'steps': 36214, 'loss/train': 1.4386712312698364} +03/05/2022 08:02:14 - INFO - codeparrot_training - Step 36215: {'lr': 0.00043691232768388856, 'samples': 18542592, 'steps': 36215, 'loss/train': 0.8185907006263733} +03/05/2022 08:02:17 - INFO - codeparrot_training - Step 36216: {'lr': 0.00043690880346602755, 'samples': 18543104, 'steps': 36216, 'loss/train': 2.100632667541504} +03/05/2022 08:02:19 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 08:02:23 - INFO - codeparrot_training - Step 36217: {'lr': 0.0004369052791639483, 'samples': 18543616, 'steps': 36217, 'loss/train': 2.7359654903411865} +03/05/2022 08:02:26 - INFO - codeparrot_training - Step 36218: {'lr': 0.0004369017547776525, 'samples': 18544128, 'steps': 36218, 'loss/train': 2.3105406761169434} +03/05/2022 08:02:29 - INFO - codeparrot_training - Step 36219: {'lr': 0.0004368982303071416, 'samples': 18544640, 'steps': 36219, 'loss/train': 0.2777814269065857} +03/05/2022 08:02:31 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/05/2022 08:02:35 - INFO - codeparrot_training - Step 36220: {'lr': 0.0004368947057524173, 'samples': 18545152, 'steps': 36220, 'loss/train': 0.5430562496185303} +03/05/2022 08:02:38 - INFO - codeparrot_training - Step 36221: {'lr': 0.00043689118111348105, 'samples': 18545664, 'steps': 36221, 'loss/train': 1.5130711793899536} +03/05/2022 08:02:39 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 08:02:43 - INFO - codeparrot_training - Step 36222: {'lr': 0.00043688765639033456, 'samples': 18546176, 'steps': 36222, 'loss/train': 1.687487006187439} +03/05/2022 08:02:46 - INFO - codeparrot_training - Step 36223: {'lr': 0.00043688413158297934, 'samples': 18546688, 'steps': 36223, 'loss/train': 1.8386186361312866} +03/05/2022 08:02:48 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/05/2022 08:02:51 - INFO - codeparrot_training - Step 36224: {'lr': 0.00043688060669141705, 'samples': 18547200, 'steps': 36224, 'loss/train': 0.5818009376525879} +03/05/2022 08:02:54 - INFO - codeparrot_training - Step 36225: {'lr': 0.00043687708171564923, 'samples': 18547712, 'steps': 36225, 'loss/train': 0.5071030259132385} +03/05/2022 08:02:56 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 08:03:00 - INFO - codeparrot_training - Step 36226: {'lr': 0.00043687355665567745, 'samples': 18548224, 'steps': 36226, 'loss/train': 2.066742181777954} +03/05/2022 08:03:03 - INFO - codeparrot_training - Step 36227: {'lr': 0.0004368700315115034, 'samples': 18548736, 'steps': 36227, 'loss/train': 2.1796653270721436} +03/05/2022 08:03:05 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 08:03:08 - INFO - codeparrot_training - Step 36228: {'lr': 0.00043686650628312854, 'samples': 18549248, 'steps': 36228, 'loss/train': 1.6563241481781006} +03/05/2022 08:03:11 - INFO - codeparrot_training - Step 36229: {'lr': 0.00043686298097055456, 'samples': 18549760, 'steps': 36229, 'loss/train': 1.921481728553772} +03/05/2022 08:03:13 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 08:03:17 - INFO - codeparrot_training - Step 36230: {'lr': 0.0004368594555737829, 'samples': 18550272, 'steps': 36230, 'loss/train': 1.4369345903396606} +03/05/2022 08:03:20 - INFO - codeparrot_training - Step 36231: {'lr': 0.0004368559300928153, 'samples': 18550784, 'steps': 36231, 'loss/train': 2.365375280380249} +03/05/2022 08:03:22 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 08:03:25 - INFO - codeparrot_training - Step 36232: {'lr': 0.0004368524045276534, 'samples': 18551296, 'steps': 36232, 'loss/train': 1.371537446975708} +03/05/2022 08:03:28 - INFO - codeparrot_training - Step 36233: {'lr': 0.00043684887887829863, 'samples': 18551808, 'steps': 36233, 'loss/train': 1.244019627571106} +03/05/2022 08:03:30 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 08:03:34 - INFO - codeparrot_training - Step 36234: {'lr': 0.0004368453531447526, 'samples': 18552320, 'steps': 36234, 'loss/train': 1.665464997291565} +03/05/2022 08:03:37 - INFO - codeparrot_training - Step 36235: {'lr': 0.00043684182732701694, 'samples': 18552832, 'steps': 36235, 'loss/train': 1.2097283601760864} +03/05/2022 08:03:39 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/05/2022 08:03:42 - INFO - codeparrot_training - Step 36236: {'lr': 0.00043683830142509327, 'samples': 18553344, 'steps': 36236, 'loss/train': 1.4860055446624756} +03/05/2022 08:03:45 - INFO - codeparrot_training - Step 36237: {'lr': 0.00043683477543898314, 'samples': 18553856, 'steps': 36237, 'loss/train': 1.096606731414795} +03/05/2022 08:03:47 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 08:03:50 - INFO - codeparrot_training - Step 36238: {'lr': 0.0004368312493686881, 'samples': 18554368, 'steps': 36238, 'loss/train': 1.7119722366333008} +03/05/2022 08:03:54 - INFO - codeparrot_training - Step 36239: {'lr': 0.0004368277232142098, 'samples': 18554880, 'steps': 36239, 'loss/train': 1.7283732891082764} +03/05/2022 08:03:55 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/05/2022 08:03:59 - INFO - codeparrot_training - Step 36240: {'lr': 0.00043682419697554985, 'samples': 18555392, 'steps': 36240, 'loss/train': 2.4775230884552} +03/05/2022 08:04:02 - INFO - codeparrot_training - Step 36241: {'lr': 0.0004368206706527098, 'samples': 18555904, 'steps': 36241, 'loss/train': 1.2291070222854614} +03/05/2022 08:04:04 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 08:04:07 - INFO - codeparrot_training - Step 36242: {'lr': 0.00043681714424569117, 'samples': 18556416, 'steps': 36242, 'loss/train': 1.6486002206802368} +03/05/2022 08:04:10 - INFO - codeparrot_training - Step 36243: {'lr': 0.0004368136177544957, 'samples': 18556928, 'steps': 36243, 'loss/train': 2.1133370399475098} +03/05/2022 08:04:12 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 08:04:16 - INFO - codeparrot_training - Step 36244: {'lr': 0.00043681009117912484, 'samples': 18557440, 'steps': 36244, 'loss/train': 2.095912218093872} +03/05/2022 08:04:19 - INFO - codeparrot_training - Step 36245: {'lr': 0.0004368065645195803, 'samples': 18557952, 'steps': 36245, 'loss/train': 0.3720453083515167} +03/05/2022 08:04:21 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/05/2022 08:04:24 - INFO - codeparrot_training - Step 36246: {'lr': 0.0004368030377758636, 'samples': 18558464, 'steps': 36246, 'loss/train': 1.4571070671081543} +03/05/2022 08:04:27 - INFO - codeparrot_training - Step 36247: {'lr': 0.0004367995109479763, 'samples': 18558976, 'steps': 36247, 'loss/train': 1.5462771654129028} +03/05/2022 08:04:29 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/05/2022 08:04:33 - INFO - codeparrot_training - Step 36248: {'lr': 0.00043679598403592, 'samples': 18559488, 'steps': 36248, 'loss/train': 1.2162226438522339} +03/05/2022 08:04:36 - INFO - codeparrot_training - Step 36249: {'lr': 0.00043679245703969627, 'samples': 18560000, 'steps': 36249, 'loss/train': 1.5518146753311157} +03/05/2022 08:04:38 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/05/2022 08:04:41 - INFO - codeparrot_training - Step 36250: {'lr': 0.00043678892995930685, 'samples': 18560512, 'steps': 36250, 'loss/train': 1.7710063457489014} +03/05/2022 08:04:44 - INFO - codeparrot_training - Step 36251: {'lr': 0.00043678540279475314, 'samples': 18561024, 'steps': 36251, 'loss/train': 2.0098659992218018} +03/05/2022 08:04:47 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 08:04:50 - INFO - codeparrot_training - Step 36252: {'lr': 0.0004367818755460369, 'samples': 18561536, 'steps': 36252, 'loss/train': 2.060177803039551} +03/05/2022 08:04:53 - INFO - codeparrot_training - Step 36253: {'lr': 0.00043677834821315956, 'samples': 18562048, 'steps': 36253, 'loss/train': 2.0443012714385986} +03/05/2022 08:04:55 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/05/2022 08:04:58 - INFO - codeparrot_training - Step 36254: {'lr': 0.00043677482079612276, 'samples': 18562560, 'steps': 36254, 'loss/train': 1.756696343421936} +03/05/2022 08:05:01 - INFO - codeparrot_training - Step 36255: {'lr': 0.00043677129329492814, 'samples': 18563072, 'steps': 36255, 'loss/train': 1.474064588546753} +03/05/2022 08:05:03 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 08:05:06 - INFO - codeparrot_training - Step 36256: {'lr': 0.00043676776570957725, 'samples': 18563584, 'steps': 36256, 'loss/train': 1.7547359466552734} +03/05/2022 08:05:10 - INFO - codeparrot_training - Step 36257: {'lr': 0.0004367642380400717, 'samples': 18564096, 'steps': 36257, 'loss/train': 2.2343010902404785} +03/05/2022 08:05:12 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 08:05:15 - INFO - codeparrot_training - Step 36258: {'lr': 0.0004367607102864131, 'samples': 18564608, 'steps': 36258, 'loss/train': 1.624328851699829} +03/05/2022 08:05:18 - INFO - codeparrot_training - Step 36259: {'lr': 0.00043675718244860296, 'samples': 18565120, 'steps': 36259, 'loss/train': 1.786456823348999} +03/05/2022 08:05:20 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 08:05:23 - INFO - codeparrot_training - Step 36260: {'lr': 0.00043675365452664286, 'samples': 18565632, 'steps': 36260, 'loss/train': 1.4165698289871216} +03/05/2022 08:05:26 - INFO - codeparrot_training - Step 36261: {'lr': 0.0004367501265205345, 'samples': 18566144, 'steps': 36261, 'loss/train': 2.088470935821533} +03/05/2022 08:05:28 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/05/2022 08:05:32 - INFO - codeparrot_training - Step 36262: {'lr': 0.0004367465984302794, 'samples': 18566656, 'steps': 36262, 'loss/train': 1.8781400918960571} +03/05/2022 08:05:35 - INFO - codeparrot_training - Step 36263: {'lr': 0.0004367430702558792, 'samples': 18567168, 'steps': 36263, 'loss/train': 1.334175705909729} +03/05/2022 08:05:37 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 08:05:40 - INFO - codeparrot_training - Step 36264: {'lr': 0.0004367395419973355, 'samples': 18567680, 'steps': 36264, 'loss/train': 1.8411093950271606} +03/05/2022 08:05:43 - INFO - codeparrot_training - Step 36265: {'lr': 0.00043673601365464975, 'samples': 18568192, 'steps': 36265, 'loss/train': 1.7900177240371704} +03/05/2022 08:05:45 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 08:05:49 - INFO - codeparrot_training - Step 36266: {'lr': 0.00043673248522782364, 'samples': 18568704, 'steps': 36266, 'loss/train': 1.3390437364578247} +03/05/2022 08:05:52 - INFO - codeparrot_training - Step 36267: {'lr': 0.0004367289567168588, 'samples': 18569216, 'steps': 36267, 'loss/train': 2.2274889945983887} +03/05/2022 08:05:53 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/05/2022 08:05:57 - INFO - codeparrot_training - Step 36268: {'lr': 0.00043672542812175675, 'samples': 18569728, 'steps': 36268, 'loss/train': 1.308266520500183} +03/05/2022 08:06:00 - INFO - codeparrot_training - Step 36269: {'lr': 0.00043672189944251905, 'samples': 18570240, 'steps': 36269, 'loss/train': 1.2786180973052979} +03/05/2022 08:06:01 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 08:06:05 - INFO - codeparrot_training - Step 36270: {'lr': 0.0004367183706791474, 'samples': 18570752, 'steps': 36270, 'loss/train': 1.9468998908996582} +03/05/2022 08:06:09 - INFO - codeparrot_training - Step 36271: {'lr': 0.0004367148418316434, 'samples': 18571264, 'steps': 36271, 'loss/train': 2.235231876373291} +03/05/2022 08:06:10 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/05/2022 08:06:14 - INFO - codeparrot_training - Step 36272: {'lr': 0.0004367113129000085, 'samples': 18571776, 'steps': 36272, 'loss/train': 1.8377752304077148} +03/05/2022 08:06:17 - INFO - codeparrot_training - Step 36273: {'lr': 0.00043670778388424434, 'samples': 18572288, 'steps': 36273, 'loss/train': 1.2235463857650757} +03/05/2022 08:06:18 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 08:06:22 - INFO - codeparrot_training - Step 36274: {'lr': 0.00043670425478435263, 'samples': 18572800, 'steps': 36274, 'loss/train': 1.6768476963043213} +03/05/2022 08:06:25 - INFO - codeparrot_training - Step 36275: {'lr': 0.00043670072560033474, 'samples': 18573312, 'steps': 36275, 'loss/train': 1.6523325443267822} +03/05/2022 08:06:27 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 08:06:31 - INFO - codeparrot_training - Step 36276: {'lr': 0.00043669719633219247, 'samples': 18573824, 'steps': 36276, 'loss/train': 1.4076370000839233} +03/05/2022 08:06:34 - INFO - codeparrot_training - Step 36277: {'lr': 0.0004366936669799273, 'samples': 18574336, 'steps': 36277, 'loss/train': 1.6886093616485596} +03/05/2022 08:06:35 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 08:06:39 - INFO - codeparrot_training - Step 36278: {'lr': 0.0004366901375435408, 'samples': 18574848, 'steps': 36278, 'loss/train': 2.163445234298706} +03/05/2022 08:06:42 - INFO - codeparrot_training - Step 36279: {'lr': 0.0004366866080230347, 'samples': 18575360, 'steps': 36279, 'loss/train': 2.326011896133423} +03/05/2022 08:06:43 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 08:06:48 - INFO - codeparrot_training - Step 36280: {'lr': 0.0004366830784184104, 'samples': 18575872, 'steps': 36280, 'loss/train': 1.7870368957519531} +03/05/2022 08:06:51 - INFO - codeparrot_training - Step 36281: {'lr': 0.00043667954872966965, 'samples': 18576384, 'steps': 36281, 'loss/train': 1.8372423648834229} +03/05/2022 08:06:52 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 08:06:56 - INFO - codeparrot_training - Step 36282: {'lr': 0.000436676018956814, 'samples': 18576896, 'steps': 36282, 'loss/train': 1.413135051727295} +03/05/2022 08:06:59 - INFO - codeparrot_training - Step 36283: {'lr': 0.0004366724890998449, 'samples': 18577408, 'steps': 36283, 'loss/train': 1.738130807876587} +03/05/2022 08:07:00 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 08:07:05 - INFO - codeparrot_training - Step 36284: {'lr': 0.00043666895915876416, 'samples': 18577920, 'steps': 36284, 'loss/train': 1.5071176290512085} +03/05/2022 08:07:08 - INFO - codeparrot_training - Step 36285: {'lr': 0.0004366654291335732, 'samples': 18578432, 'steps': 36285, 'loss/train': 1.9544986486434937} +03/05/2022 08:07:08 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/05/2022 08:07:13 - INFO - codeparrot_training - Step 36286: {'lr': 0.00043666189902427367, 'samples': 18578944, 'steps': 36286, 'loss/train': 1.8722658157348633} +03/05/2022 08:07:16 - INFO - codeparrot_training - Step 36287: {'lr': 0.00043665836883086725, 'samples': 18579456, 'steps': 36287, 'loss/train': 1.608252763748169} +03/05/2022 08:07:17 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 08:07:21 - INFO - codeparrot_training - Step 36288: {'lr': 0.0004366548385533554, 'samples': 18579968, 'steps': 36288, 'loss/train': 1.3547999858856201} +03/05/2022 08:07:24 - INFO - codeparrot_training - Step 36289: {'lr': 0.0004366513081917398, 'samples': 18580480, 'steps': 36289, 'loss/train': 0.4692898392677307} +03/05/2022 08:07:25 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/05/2022 08:07:30 - INFO - codeparrot_training - Step 36290: {'lr': 0.00043664777774602196, 'samples': 18580992, 'steps': 36290, 'loss/train': 2.0046756267547607} +03/05/2022 08:07:33 - INFO - codeparrot_training - Step 36291: {'lr': 0.00043664424721620354, 'samples': 18581504, 'steps': 36291, 'loss/train': 2.0737833976745605} +03/05/2022 08:07:34 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 08:07:38 - INFO - codeparrot_training - Step 36292: {'lr': 0.00043664071660228605, 'samples': 18582016, 'steps': 36292, 'loss/train': 1.5705592632293701} +03/05/2022 08:07:41 - INFO - codeparrot_training - Step 36293: {'lr': 0.00043663718590427117, 'samples': 18582528, 'steps': 36293, 'loss/train': 1.6932003498077393} +03/05/2022 08:07:42 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 08:07:47 - INFO - codeparrot_training - Step 36294: {'lr': 0.0004366336551221605, 'samples': 18583040, 'steps': 36294, 'loss/train': 1.143328070640564} +03/05/2022 08:07:50 - INFO - codeparrot_training - Step 36295: {'lr': 0.0004366301242559555, 'samples': 18583552, 'steps': 36295, 'loss/train': 1.4513932466506958} +03/05/2022 08:07:51 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 08:07:55 - INFO - codeparrot_training - Step 36296: {'lr': 0.00043662659330565793, 'samples': 18584064, 'steps': 36296, 'loss/train': 1.858394980430603} +03/05/2022 08:07:58 - INFO - codeparrot_training - Step 36297: {'lr': 0.00043662306227126917, 'samples': 18584576, 'steps': 36297, 'loss/train': 1.4701273441314697} +03/05/2022 08:07:59 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 08:08:04 - INFO - codeparrot_training - Step 36298: {'lr': 0.00043661953115279104, 'samples': 18585088, 'steps': 36298, 'loss/train': 1.7239980697631836} +03/05/2022 08:08:07 - INFO - codeparrot_training - Step 36299: {'lr': 0.000436615999950225, 'samples': 18585600, 'steps': 36299, 'loss/train': 0.7496588826179504} +03/05/2022 08:08:07 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 08:08:12 - INFO - codeparrot_training - Step 36300: {'lr': 0.0004366124686635727, 'samples': 18586112, 'steps': 36300, 'loss/train': 1.3273167610168457} +03/05/2022 08:08:15 - INFO - codeparrot_training - Step 36301: {'lr': 0.00043660893729283564, 'samples': 18586624, 'steps': 36301, 'loss/train': 1.5453933477401733} +03/05/2022 08:08:15 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 08:08:20 - INFO - codeparrot_training - Step 36302: {'lr': 0.0004366054058380155, 'samples': 18587136, 'steps': 36302, 'loss/train': 2.1349544525146484} +03/05/2022 08:08:24 - INFO - codeparrot_training - Step 36303: {'lr': 0.0004366018742991139, 'samples': 18587648, 'steps': 36303, 'loss/train': 0.9301701188087463} +03/05/2022 08:08:24 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/05/2022 08:08:29 - INFO - codeparrot_training - Step 36304: {'lr': 0.00043659834267613227, 'samples': 18588160, 'steps': 36304, 'loss/train': 0.515778660774231} +03/05/2022 08:08:32 - INFO - codeparrot_training - Step 36305: {'lr': 0.0004365948109690724, 'samples': 18588672, 'steps': 36305, 'loss/train': 1.6533077955245972} +03/05/2022 08:08:32 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 08:08:37 - INFO - codeparrot_training - Step 36306: {'lr': 0.0004365912791779357, 'samples': 18589184, 'steps': 36306, 'loss/train': 1.8672386407852173} +03/05/2022 08:08:40 - INFO - codeparrot_training - Step 36307: {'lr': 0.00043658774730272393, 'samples': 18589696, 'steps': 36307, 'loss/train': 1.7598237991333008} +03/05/2022 08:08:40 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/05/2022 08:08:46 - INFO - codeparrot_training - Step 36308: {'lr': 0.00043658421534343856, 'samples': 18590208, 'steps': 36308, 'loss/train': 1.6849849224090576} +03/05/2022 08:08:49 - INFO - codeparrot_training - Step 36309: {'lr': 0.0004365806833000813, 'samples': 18590720, 'steps': 36309, 'loss/train': 0.6293301582336426} +03/05/2022 08:08:49 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/05/2022 08:08:54 - INFO - codeparrot_training - Step 36310: {'lr': 0.0004365771511726535, 'samples': 18591232, 'steps': 36310, 'loss/train': 2.0196645259857178} +03/05/2022 08:08:57 - INFO - codeparrot_training - Step 36311: {'lr': 0.00043657361896115706, 'samples': 18591744, 'steps': 36311, 'loss/train': 1.78727126121521} +03/05/2022 08:08:57 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/05/2022 08:09:03 - INFO - codeparrot_training - Step 36312: {'lr': 0.0004365700866655934, 'samples': 18592256, 'steps': 36312, 'loss/train': 1.4854425191879272} +03/05/2022 08:09:06 - INFO - codeparrot_training - Step 36313: {'lr': 0.00043656655428596407, 'samples': 18592768, 'steps': 36313, 'loss/train': 0.8980157971382141} +03/05/2022 08:09:06 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/05/2022 08:09:11 - INFO - codeparrot_training - Step 36314: {'lr': 0.0004365630218222708, 'samples': 18593280, 'steps': 36314, 'loss/train': 1.9378106594085693} +03/05/2022 08:09:14 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 08:09:16 - INFO - codeparrot_training - Step 36315: {'lr': 0.00043655948927451505, 'samples': 18593792, 'steps': 36315, 'loss/train': 2.2859151363372803} +03/05/2022 08:09:20 - INFO - codeparrot_training - Step 36316: {'lr': 0.0004365559566426985, 'samples': 18594304, 'steps': 36316, 'loss/train': 1.8438653945922852} +03/05/2022 08:09:22 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/05/2022 08:09:25 - INFO - codeparrot_training - Step 36317: {'lr': 0.0004365524239268227, 'samples': 18594816, 'steps': 36317, 'loss/train': 0.5921840071678162} +03/05/2022 08:09:28 - INFO - codeparrot_training - Step 36318: {'lr': 0.00043654889112688933, 'samples': 18595328, 'steps': 36318, 'loss/train': 1.5405128002166748} +03/05/2022 08:09:30 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/05/2022 08:09:33 - INFO - codeparrot_training - Step 36319: {'lr': 0.00043654535824289985, 'samples': 18595840, 'steps': 36319, 'loss/train': 1.8545513153076172} +03/05/2022 08:09:36 - INFO - codeparrot_training - Step 36320: {'lr': 0.0004365418252748559, 'samples': 18596352, 'steps': 36320, 'loss/train': 1.483264684677124} +03/05/2022 08:09:39 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/05/2022 08:09:42 - INFO - codeparrot_training - Step 36321: {'lr': 0.0004365382922227591, 'samples': 18596864, 'steps': 36321, 'loss/train': 1.6875332593917847} +03/05/2022 08:09:45 - INFO - codeparrot_training - Step 36322: {'lr': 0.000436534759086611, 'samples': 18597376, 'steps': 36322, 'loss/train': 1.8684985637664795} +03/05/2022 08:09:47 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 08:09:51 - INFO - codeparrot_training - Step 36323: {'lr': 0.00043653122586641323, 'samples': 18597888, 'steps': 36323, 'loss/train': 1.4339921474456787} +03/05/2022 08:09:54 - INFO - codeparrot_training - Step 36324: {'lr': 0.0004365276925621674, 'samples': 18598400, 'steps': 36324, 'loss/train': 1.404528260231018} +03/05/2022 08:09:57 - INFO - codeparrot_training - Step 36325: {'lr': 0.0004365241591738751, 'samples': 18598912, 'steps': 36325, 'loss/train': 1.858699083328247} +03/05/2022 08:09:57 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/05/2022 08:10:02 - INFO - codeparrot_training - Step 36326: {'lr': 0.0004365206257015378, 'samples': 18599424, 'steps': 36326, 'loss/train': 1.9349026679992676} +03/05/2022 08:10:05 - INFO - codeparrot_training - Step 36327: {'lr': 0.0004365170921451572, 'samples': 18599936, 'steps': 36327, 'loss/train': 1.8397786617279053} +03/05/2022 08:10:05 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/05/2022 08:10:11 - INFO - codeparrot_training - Step 36328: {'lr': 0.00043651355850473495, 'samples': 18600448, 'steps': 36328, 'loss/train': 0.9702200889587402} +03/05/2022 08:10:14 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 08:10:16 - INFO - codeparrot_training - Step 36329: {'lr': 0.0004365100247802725, 'samples': 18600960, 'steps': 36329, 'loss/train': 1.6092218160629272} +03/05/2022 08:10:19 - INFO - codeparrot_training - Step 36330: {'lr': 0.0004365064909717715, 'samples': 18601472, 'steps': 36330, 'loss/train': 2.0911831855773926} +03/05/2022 08:10:22 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 08:10:24 - INFO - codeparrot_training - Step 36331: {'lr': 0.0004365029570792336, 'samples': 18601984, 'steps': 36331, 'loss/train': 2.138514518737793} +03/05/2022 08:10:28 - INFO - codeparrot_training - Step 36332: {'lr': 0.00043649942310266035, 'samples': 18602496, 'steps': 36332, 'loss/train': 1.4679571390151978} +03/05/2022 08:10:30 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 08:10:33 - INFO - codeparrot_training - Step 36333: {'lr': 0.00043649588904205326, 'samples': 18603008, 'steps': 36333, 'loss/train': 1.8132022619247437} +03/05/2022 08:10:36 - INFO - codeparrot_training - Step 36334: {'lr': 0.0004364923548974141, 'samples': 18603520, 'steps': 36334, 'loss/train': 1.5014220476150513} +03/05/2022 08:10:38 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/05/2022 08:10:41 - INFO - codeparrot_training - Step 36335: {'lr': 0.0004364888206687443, 'samples': 18604032, 'steps': 36335, 'loss/train': 2.2461609840393066} +03/05/2022 08:10:44 - INFO - codeparrot_training - Step 36336: {'lr': 0.00043648528635604556, 'samples': 18604544, 'steps': 36336, 'loss/train': 1.0545272827148438} +03/05/2022 08:10:47 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 08:10:50 - INFO - codeparrot_training - Step 36337: {'lr': 0.00043648175195931937, 'samples': 18605056, 'steps': 36337, 'loss/train': 1.5806584358215332} +03/05/2022 08:10:53 - INFO - codeparrot_training - Step 36338: {'lr': 0.0004364782174785674, 'samples': 18605568, 'steps': 36338, 'loss/train': 1.6582310199737549} +03/05/2022 08:10:55 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/05/2022 08:10:58 - INFO - codeparrot_training - Step 36339: {'lr': 0.0004364746829137912, 'samples': 18606080, 'steps': 36339, 'loss/train': 2.021883726119995} +03/05/2022 08:11:01 - INFO - codeparrot_training - Step 36340: {'lr': 0.0004364711482649925, 'samples': 18606592, 'steps': 36340, 'loss/train': 3.045445680618286} +03/05/2022 08:11:05 - INFO - codeparrot_training - Step 36341: {'lr': 0.00043646761353217266, 'samples': 18607104, 'steps': 36341, 'loss/train': 1.5278230905532837} +03/05/2022 08:11:05 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 08:11:10 - INFO - codeparrot_training - Step 36342: {'lr': 0.0004364640787153334, 'samples': 18607616, 'steps': 36342, 'loss/train': 2.2994656562805176} +03/05/2022 08:11:13 - INFO - codeparrot_training - Step 36343: {'lr': 0.0004364605438144764, 'samples': 18608128, 'steps': 36343, 'loss/train': 2.1982994079589844} +03/05/2022 08:11:13 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 08:11:19 - INFO - codeparrot_training - Step 36344: {'lr': 0.000436457008829603, 'samples': 18608640, 'steps': 36344, 'loss/train': 1.7848387956619263} +03/05/2022 08:11:22 - INFO - codeparrot_training - Step 36345: {'lr': 0.00043645347376071507, 'samples': 18609152, 'steps': 36345, 'loss/train': 1.7005219459533691} +03/05/2022 08:11:22 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 08:11:27 - INFO - codeparrot_training - Step 36346: {'lr': 0.0004364499386078141, 'samples': 18609664, 'steps': 36346, 'loss/train': 0.48749107122421265} +03/05/2022 08:11:30 - INFO - codeparrot_training - Step 36347: {'lr': 0.00043644640337090157, 'samples': 18610176, 'steps': 36347, 'loss/train': 1.3196015357971191} +03/05/2022 08:11:31 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 08:11:36 - INFO - codeparrot_training - Step 36348: {'lr': 0.0004364428680499792, 'samples': 18610688, 'steps': 36348, 'loss/train': 1.7109352350234985} +03/05/2022 08:11:39 - INFO - codeparrot_training - Step 36349: {'lr': 0.0004364393326450486, 'samples': 18611200, 'steps': 36349, 'loss/train': 0.6692292094230652} +03/05/2022 08:11:40 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 08:11:44 - INFO - codeparrot_training - Step 36350: {'lr': 0.00043643579715611124, 'samples': 18611712, 'steps': 36350, 'loss/train': 1.216480016708374} +03/05/2022 08:11:47 - INFO - codeparrot_training - Step 36351: {'lr': 0.00043643226158316886, 'samples': 18612224, 'steps': 36351, 'loss/train': 1.5163134336471558} +03/05/2022 08:11:48 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 08:11:53 - INFO - codeparrot_training - Step 36352: {'lr': 0.00043642872592622293, 'samples': 18612736, 'steps': 36352, 'loss/train': 1.9495559930801392} +03/05/2022 08:11:56 - INFO - codeparrot_training - Step 36353: {'lr': 0.0004364251901852751, 'samples': 18613248, 'steps': 36353, 'loss/train': 1.3938871622085571} +03/05/2022 08:11:57 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 08:12:01 - INFO - codeparrot_training - Step 36354: {'lr': 0.000436421654360327, 'samples': 18613760, 'steps': 36354, 'loss/train': 1.9398480653762817} +03/05/2022 08:12:04 - INFO - codeparrot_training - Step 36355: {'lr': 0.00043641811845138016, 'samples': 18614272, 'steps': 36355, 'loss/train': 1.7702665328979492} +03/05/2022 08:12:05 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 08:12:10 - INFO - codeparrot_training - Step 36356: {'lr': 0.0004364145824584361, 'samples': 18614784, 'steps': 36356, 'loss/train': 1.406023383140564} +03/05/2022 08:12:13 - INFO - codeparrot_training - Step 36357: {'lr': 0.00043641104638149656, 'samples': 18615296, 'steps': 36357, 'loss/train': 1.6168336868286133} +03/05/2022 08:12:13 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 08:12:18 - INFO - codeparrot_training - Step 36358: {'lr': 0.00043640751022056316, 'samples': 18615808, 'steps': 36358, 'loss/train': 0.1653549075126648} +03/05/2022 08:12:21 - INFO - codeparrot_training - Step 36359: {'lr': 0.00043640397397563737, 'samples': 18616320, 'steps': 36359, 'loss/train': 1.7235809564590454} +03/05/2022 08:12:22 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/05/2022 08:12:27 - INFO - codeparrot_training - Step 36360: {'lr': 0.00043640043764672077, 'samples': 18616832, 'steps': 36360, 'loss/train': 2.0967001914978027} +03/05/2022 08:12:30 - INFO - codeparrot_training - Step 36361: {'lr': 0.00043639690123381503, 'samples': 18617344, 'steps': 36361, 'loss/train': 1.8273653984069824} +03/05/2022 08:12:30 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 08:12:35 - INFO - codeparrot_training - Step 36362: {'lr': 0.00043639336473692174, 'samples': 18617856, 'steps': 36362, 'loss/train': 1.6305148601531982} +03/05/2022 08:12:38 - INFO - codeparrot_training - Step 36363: {'lr': 0.00043638982815604247, 'samples': 18618368, 'steps': 36363, 'loss/train': 1.1789758205413818} +03/05/2022 08:12:39 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 08:12:44 - INFO - codeparrot_training - Step 36364: {'lr': 0.00043638629149117883, 'samples': 18618880, 'steps': 36364, 'loss/train': 0.12033513933420181} +03/05/2022 08:12:47 - INFO - codeparrot_training - Step 36365: {'lr': 0.0004363827547423324, 'samples': 18619392, 'steps': 36365, 'loss/train': 1.8419135808944702} +03/05/2022 08:12:47 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/05/2022 08:12:52 - INFO - codeparrot_training - Step 36366: {'lr': 0.00043637921790950476, 'samples': 18619904, 'steps': 36366, 'loss/train': 1.9991563558578491} +03/05/2022 08:12:55 - INFO - codeparrot_training - Step 36367: {'lr': 0.00043637568099269753, 'samples': 18620416, 'steps': 36367, 'loss/train': 1.2050448656082153} +03/05/2022 08:12:55 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 08:13:01 - INFO - codeparrot_training - Step 36368: {'lr': 0.00043637214399191234, 'samples': 18620928, 'steps': 36368, 'loss/train': 0.7002370953559875} +03/05/2022 08:13:04 - INFO - codeparrot_training - Step 36369: {'lr': 0.00043636860690715064, 'samples': 18621440, 'steps': 36369, 'loss/train': 1.7460111379623413} +03/05/2022 08:13:04 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/05/2022 08:13:09 - INFO - codeparrot_training - Step 36370: {'lr': 0.00043636506973841424, 'samples': 18621952, 'steps': 36370, 'loss/train': 1.8994289636611938} +03/05/2022 08:13:12 - INFO - codeparrot_training - Step 36371: {'lr': 0.00043636153248570453, 'samples': 18622464, 'steps': 36371, 'loss/train': 1.7057499885559082} +03/05/2022 08:13:13 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 08:13:18 - INFO - codeparrot_training - Step 36372: {'lr': 0.0004363579951490232, 'samples': 18622976, 'steps': 36372, 'loss/train': 1.9976303577423096} +03/05/2022 08:13:21 - INFO - codeparrot_training - Step 36373: {'lr': 0.0004363544577283718, 'samples': 18623488, 'steps': 36373, 'loss/train': 1.1317470073699951} +03/05/2022 08:13:21 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/05/2022 08:13:26 - INFO - codeparrot_training - Step 36374: {'lr': 0.0004363509202237521, 'samples': 18624000, 'steps': 36374, 'loss/train': 1.1167805194854736} +03/05/2022 08:13:30 - INFO - codeparrot_training - Step 36375: {'lr': 0.0004363473826351654, 'samples': 18624512, 'steps': 36375, 'loss/train': 1.3876641988754272} +03/05/2022 08:13:30 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/05/2022 08:13:35 - INFO - codeparrot_training - Step 36376: {'lr': 0.0004363438449626135, 'samples': 18625024, 'steps': 36376, 'loss/train': 1.874290943145752} +03/05/2022 08:13:38 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 08:13:40 - INFO - codeparrot_training - Step 36377: {'lr': 0.000436340307206098, 'samples': 18625536, 'steps': 36377, 'loss/train': 1.3832041025161743} +03/05/2022 08:13:43 - INFO - codeparrot_training - Step 36378: {'lr': 0.00043633676936562026, 'samples': 18626048, 'steps': 36378, 'loss/train': 1.6721936464309692} +03/05/2022 08:13:46 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 08:13:49 - INFO - codeparrot_training - Step 36379: {'lr': 0.0004363332314411822, 'samples': 18626560, 'steps': 36379, 'loss/train': 1.7718744277954102} +03/05/2022 08:13:52 - INFO - codeparrot_training - Step 36380: {'lr': 0.0004363296934327852, 'samples': 18627072, 'steps': 36380, 'loss/train': 1.2038774490356445} +03/05/2022 08:13:55 - INFO - codeparrot_training - Step 36381: {'lr': 0.00043632615534043096, 'samples': 18627584, 'steps': 36381, 'loss/train': 1.9615819454193115} +03/05/2022 08:13:55 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/05/2022 08:14:01 - INFO - codeparrot_training - Step 36382: {'lr': 0.00043632261716412097, 'samples': 18628096, 'steps': 36382, 'loss/train': 2.1924550533294678} +03/05/2022 08:14:04 - INFO - codeparrot_training - Step 36383: {'lr': 0.0004363190789038569, 'samples': 18628608, 'steps': 36383, 'loss/train': 0.7029555439949036} +03/05/2022 08:14:04 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/05/2022 08:14:09 - INFO - codeparrot_training - Step 36384: {'lr': 0.0004363155405596404, 'samples': 18629120, 'steps': 36384, 'loss/train': 1.7869279384613037} +03/05/2022 08:14:12 - INFO - codeparrot_training - Step 36385: {'lr': 0.00043631200213147296, 'samples': 18629632, 'steps': 36385, 'loss/train': 2.1007018089294434} +03/05/2022 08:14:13 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/05/2022 08:14:18 - INFO - codeparrot_training - Step 36386: {'lr': 0.0004363084636193561, 'samples': 18630144, 'steps': 36386, 'loss/train': 1.021134614944458} +03/05/2022 08:14:21 - INFO - codeparrot_training - Step 36387: {'lr': 0.0004363049250232917, 'samples': 18630656, 'steps': 36387, 'loss/train': 1.9525604248046875} +03/05/2022 08:14:21 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/05/2022 08:14:26 - INFO - codeparrot_training - Step 36388: {'lr': 0.000436301386343281, 'samples': 18631168, 'steps': 36388, 'loss/train': 1.8891663551330566} +03/05/2022 08:14:29 - INFO - codeparrot_training - Step 36389: {'lr': 0.0004362978475793259, 'samples': 18631680, 'steps': 36389, 'loss/train': 1.3433079719543457} +03/05/2022 08:14:35 - INFO - codeparrot_training - Step 36390: {'lr': 0.00043629430873142773, 'samples': 18632192, 'steps': 36390, 'loss/train': 1.395236849784851} +03/05/2022 08:14:38 - INFO - codeparrot_training - Step 36391: {'lr': 0.00043629076979958837, 'samples': 18632704, 'steps': 36391, 'loss/train': 2.234822988510132} +03/05/2022 08:14:38 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/05/2022 08:14:43 - INFO - codeparrot_training - Step 36392: {'lr': 0.00043628723078380916, 'samples': 18633216, 'steps': 36392, 'loss/train': 1.5440551042556763} +03/05/2022 08:14:46 - INFO - codeparrot_training - Step 36393: {'lr': 0.0004362836916840919, 'samples': 18633728, 'steps': 36393, 'loss/train': 1.6897200345993042} +03/05/2022 08:14:46 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/05/2022 08:14:51 - INFO - codeparrot_training - Step 36394: {'lr': 0.00043628015250043794, 'samples': 18634240, 'steps': 36394, 'loss/train': 2.107179880142212} +03/05/2022 08:14:55 - INFO - codeparrot_training - Step 36395: {'lr': 0.00043627661323284914, 'samples': 18634752, 'steps': 36395, 'loss/train': 1.2568882703781128} +03/05/2022 08:14:55 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 08:15:00 - INFO - codeparrot_training - Step 36396: {'lr': 0.00043627307388132693, 'samples': 18635264, 'steps': 36396, 'loss/train': 0.2920355796813965} +03/05/2022 08:15:04 - INFO - codeparrot_training - Step 36397: {'lr': 0.0004362695344458729, 'samples': 18635776, 'steps': 36397, 'loss/train': 1.8889368772506714} +03/05/2022 08:15:06 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 08:15:09 - INFO - codeparrot_training - Step 36398: {'lr': 0.00043626599492648877, 'samples': 18636288, 'steps': 36398, 'loss/train': 1.6631898880004883} +03/05/2022 08:15:12 - INFO - codeparrot_training - Step 36399: {'lr': 0.000436262455323176, 'samples': 18636800, 'steps': 36399, 'loss/train': 0.8209893107414246} +03/05/2022 08:15:15 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/05/2022 08:15:17 - INFO - codeparrot_training - Step 36400: {'lr': 0.0004362589156359363, 'samples': 18637312, 'steps': 36400, 'loss/train': 1.342013955116272} +03/05/2022 08:15:21 - INFO - codeparrot_training - Step 36401: {'lr': 0.00043625537586477114, 'samples': 18637824, 'steps': 36401, 'loss/train': 1.9050298929214478} +03/05/2022 08:15:23 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 08:15:26 - INFO - codeparrot_training - Step 36402: {'lr': 0.00043625183600968224, 'samples': 18638336, 'steps': 36402, 'loss/train': 1.2861864566802979} +03/05/2022 08:15:29 - INFO - codeparrot_training - Step 36403: {'lr': 0.00043624829607067105, 'samples': 18638848, 'steps': 36403, 'loss/train': 3.06931209564209} +03/05/2022 08:15:31 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 08:15:34 - INFO - codeparrot_training - Step 36404: {'lr': 0.0004362447560477394, 'samples': 18639360, 'steps': 36404, 'loss/train': 1.612502098083496} +03/05/2022 08:15:38 - INFO - codeparrot_training - Step 36405: {'lr': 0.0004362412159408886, 'samples': 18639872, 'steps': 36405, 'loss/train': 2.355491876602173} +03/05/2022 08:15:39 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 08:15:43 - INFO - codeparrot_training - Step 36406: {'lr': 0.0004362376757501205, 'samples': 18640384, 'steps': 36406, 'loss/train': 1.8124324083328247} +03/05/2022 08:15:46 - INFO - codeparrot_training - Step 36407: {'lr': 0.00043623413547543645, 'samples': 18640896, 'steps': 36407, 'loss/train': 2.078056812286377} +03/05/2022 08:15:48 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/05/2022 08:15:51 - INFO - codeparrot_training - Step 36408: {'lr': 0.00043623059511683826, 'samples': 18641408, 'steps': 36408, 'loss/train': 1.318961501121521} +03/05/2022 08:15:54 - INFO - codeparrot_training - Step 36409: {'lr': 0.0004362270546743274, 'samples': 18641920, 'steps': 36409, 'loss/train': 1.718108892440796} +03/05/2022 08:15:56 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 08:16:00 - INFO - codeparrot_training - Step 36410: {'lr': 0.0004362235141479055, 'samples': 18642432, 'steps': 36410, 'loss/train': 1.808218002319336} +03/05/2022 08:16:03 - INFO - codeparrot_training - Step 36411: {'lr': 0.0004362199735375742, 'samples': 18642944, 'steps': 36411, 'loss/train': 1.0177865028381348} +03/05/2022 08:16:04 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 08:16:08 - INFO - codeparrot_training - Step 36412: {'lr': 0.000436216432843335, 'samples': 18643456, 'steps': 36412, 'loss/train': 1.6219316720962524} +03/05/2022 08:16:11 - INFO - codeparrot_training - Step 36413: {'lr': 0.00043621289206518957, 'samples': 18643968, 'steps': 36413, 'loss/train': 2.119952917098999} +03/05/2022 08:16:12 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/05/2022 08:16:17 - INFO - codeparrot_training - Step 36414: {'lr': 0.00043620935120313955, 'samples': 18644480, 'steps': 36414, 'loss/train': 2.138293981552124} +03/05/2022 08:16:20 - INFO - codeparrot_training - Step 36415: {'lr': 0.0004362058102571864, 'samples': 18644992, 'steps': 36415, 'loss/train': 1.6566400527954102} +03/05/2022 08:16:23 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 08:16:25 - INFO - codeparrot_training - Step 36416: {'lr': 0.00043620226922733174, 'samples': 18645504, 'steps': 36416, 'loss/train': 1.754294991493225} +03/05/2022 08:16:28 - INFO - codeparrot_training - Step 36417: {'lr': 0.0004361987281135773, 'samples': 18646016, 'steps': 36417, 'loss/train': 1.889129877090454} +03/05/2022 08:16:31 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 08:16:34 - INFO - codeparrot_training - Step 36418: {'lr': 0.00043619518691592453, 'samples': 18646528, 'steps': 36418, 'loss/train': 1.7421029806137085} +03/05/2022 08:16:37 - INFO - codeparrot_training - Step 36419: {'lr': 0.00043619164563437506, 'samples': 18647040, 'steps': 36419, 'loss/train': 2.045658826828003} +03/05/2022 08:16:39 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 08:16:42 - INFO - codeparrot_training - Step 36420: {'lr': 0.0004361881042689306, 'samples': 18647552, 'steps': 36420, 'loss/train': 1.760764241218567} +03/05/2022 08:16:45 - INFO - codeparrot_training - Step 36421: {'lr': 0.00043618456281959263, 'samples': 18648064, 'steps': 36421, 'loss/train': 1.3869388103485107} +03/05/2022 08:16:48 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 08:16:50 - INFO - codeparrot_training - Step 36422: {'lr': 0.0004361810212863627, 'samples': 18648576, 'steps': 36422, 'loss/train': 2.0745608806610107} +03/05/2022 08:16:54 - INFO - codeparrot_training - Step 36423: {'lr': 0.0004361774796692425, 'samples': 18649088, 'steps': 36423, 'loss/train': 1.7216525077819824} +03/05/2022 08:16:56 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 08:16:59 - INFO - codeparrot_training - Step 36424: {'lr': 0.00043617393796823367, 'samples': 18649600, 'steps': 36424, 'loss/train': 2.081608533859253} +03/05/2022 08:17:03 - INFO - codeparrot_training - Step 36425: {'lr': 0.00043617039618333765, 'samples': 18650112, 'steps': 36425, 'loss/train': 1.8802214860916138} +03/05/2022 08:17:06 - INFO - codeparrot_training - Step 36426: {'lr': 0.00043616685431455615, 'samples': 18650624, 'steps': 36426, 'loss/train': 0.8684747219085693} +03/05/2022 08:17:06 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/05/2022 08:17:11 - INFO - codeparrot_training - Step 36427: {'lr': 0.0004361633123618908, 'samples': 18651136, 'steps': 36427, 'loss/train': 1.3209688663482666} +03/05/2022 08:17:14 - INFO - codeparrot_training - Step 36428: {'lr': 0.00043615977032534305, 'samples': 18651648, 'steps': 36428, 'loss/train': 1.95578134059906} +03/05/2022 08:17:15 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/05/2022 08:17:19 - INFO - codeparrot_training - Step 36429: {'lr': 0.00043615622820491464, 'samples': 18652160, 'steps': 36429, 'loss/train': 0.7057967782020569} +03/05/2022 08:17:23 - INFO - codeparrot_training - Step 36430: {'lr': 0.00043615268600060705, 'samples': 18652672, 'steps': 36430, 'loss/train': 1.0346256494522095} +03/05/2022 08:17:23 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 08:17:28 - INFO - codeparrot_training - Step 36431: {'lr': 0.000436149143712422, 'samples': 18653184, 'steps': 36431, 'loss/train': 2.3508975505828857} +03/05/2022 08:17:31 - INFO - codeparrot_training - Step 36432: {'lr': 0.0004361456013403609, 'samples': 18653696, 'steps': 36432, 'loss/train': 2.1107289791107178} +03/05/2022 08:17:31 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/05/2022 08:17:36 - INFO - codeparrot_training - Step 36433: {'lr': 0.00043614205888442553, 'samples': 18654208, 'steps': 36433, 'loss/train': 1.9675672054290771} +03/05/2022 08:17:40 - INFO - codeparrot_training - Step 36434: {'lr': 0.00043613851634461743, 'samples': 18654720, 'steps': 36434, 'loss/train': 1.5066264867782593} +03/05/2022 08:17:40 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 08:17:45 - INFO - codeparrot_training - Step 36435: {'lr': 0.00043613497372093827, 'samples': 18655232, 'steps': 36435, 'loss/train': 1.3592115640640259} +03/05/2022 08:17:48 - INFO - codeparrot_training - Step 36436: {'lr': 0.0004361314310133894, 'samples': 18655744, 'steps': 36436, 'loss/train': 1.8794151544570923} +03/05/2022 08:17:48 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 08:17:53 - INFO - codeparrot_training - Step 36437: {'lr': 0.00043612788822197266, 'samples': 18656256, 'steps': 36437, 'loss/train': 0.9860595464706421} +03/05/2022 08:17:56 - INFO - codeparrot_training - Step 36438: {'lr': 0.0004361243453466896, 'samples': 18656768, 'steps': 36438, 'loss/train': 2.216850996017456} +03/05/2022 08:17:56 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 08:18:02 - INFO - codeparrot_training - Step 36439: {'lr': 0.0004361208023875417, 'samples': 18657280, 'steps': 36439, 'loss/train': 1.891960859298706} +03/05/2022 08:18:05 - INFO - codeparrot_training - Step 36440: {'lr': 0.00043611725934453074, 'samples': 18657792, 'steps': 36440, 'loss/train': 1.984387755393982} +03/05/2022 08:18:05 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 08:18:10 - INFO - codeparrot_training - Step 36441: {'lr': 0.00043611371621765817, 'samples': 18658304, 'steps': 36441, 'loss/train': 2.078974723815918} +03/05/2022 08:18:13 - INFO - codeparrot_training - Step 36442: {'lr': 0.0004361101730069256, 'samples': 18658816, 'steps': 36442, 'loss/train': 2.079745292663574} +03/05/2022 08:18:14 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 08:18:19 - INFO - codeparrot_training - Step 36443: {'lr': 0.00043610662971233465, 'samples': 18659328, 'steps': 36443, 'loss/train': 2.5560784339904785} +03/05/2022 08:18:22 - INFO - codeparrot_training - Step 36444: {'lr': 0.00043610308633388695, 'samples': 18659840, 'steps': 36444, 'loss/train': 1.658698320388794} +03/05/2022 08:18:22 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 08:18:27 - INFO - codeparrot_training - Step 36445: {'lr': 0.0004360995428715841, 'samples': 18660352, 'steps': 36445, 'loss/train': 1.7069393396377563} +03/05/2022 08:18:31 - INFO - codeparrot_training - Step 36446: {'lr': 0.00043609599932542764, 'samples': 18660864, 'steps': 36446, 'loss/train': 1.7503266334533691} +03/05/2022 08:18:31 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 08:18:36 - INFO - codeparrot_training - Step 36447: {'lr': 0.00043609245569541924, 'samples': 18661376, 'steps': 36447, 'loss/train': 2.0942459106445312} +03/05/2022 08:18:39 - INFO - codeparrot_training - Step 36448: {'lr': 0.00043608891198156037, 'samples': 18661888, 'steps': 36448, 'loss/train': 2.0832314491271973} +03/05/2022 08:18:40 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/05/2022 08:18:44 - INFO - codeparrot_training - Step 36449: {'lr': 0.0004360853681838528, 'samples': 18662400, 'steps': 36449, 'loss/train': 2.1019678115844727} +03/05/2022 08:18:48 - INFO - codeparrot_training - Step 36450: {'lr': 0.0004360818243022979, 'samples': 18662912, 'steps': 36450, 'loss/train': 1.008339762687683} +03/05/2022 08:18:48 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 08:18:53 - INFO - codeparrot_training - Step 36451: {'lr': 0.00043607828033689753, 'samples': 18663424, 'steps': 36451, 'loss/train': 2.0179286003112793} +03/05/2022 08:18:56 - INFO - codeparrot_training - Step 36452: {'lr': 0.000436074736287653, 'samples': 18663936, 'steps': 36452, 'loss/train': 1.8818203210830688} +03/05/2022 08:18:56 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/05/2022 08:19:01 - INFO - codeparrot_training - Step 36453: {'lr': 0.00043607119215456625, 'samples': 18664448, 'steps': 36453, 'loss/train': 1.5660171508789062} +03/05/2022 08:19:05 - INFO - codeparrot_training - Step 36454: {'lr': 0.00043606764793763865, 'samples': 18664960, 'steps': 36454, 'loss/train': 2.4266164302825928} +03/05/2022 08:19:05 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 08:19:10 - INFO - codeparrot_training - Step 36455: {'lr': 0.00043606410363687177, 'samples': 18665472, 'steps': 36455, 'loss/train': 1.7144031524658203} +03/05/2022 08:19:13 - INFO - codeparrot_training - Step 36456: {'lr': 0.00043606055925226727, 'samples': 18665984, 'steps': 36456, 'loss/train': 1.6253248453140259} +03/05/2022 08:19:13 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/05/2022 08:19:18 - INFO - codeparrot_training - Step 36457: {'lr': 0.0004360570147838269, 'samples': 18666496, 'steps': 36457, 'loss/train': 1.2984052896499634} +03/05/2022 08:19:21 - INFO - codeparrot_training - Step 36458: {'lr': 0.00043605347023155193, 'samples': 18667008, 'steps': 36458, 'loss/train': 1.856462836265564} +03/05/2022 08:19:21 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 08:19:27 - INFO - codeparrot_training - Step 36459: {'lr': 0.0004360499255954442, 'samples': 18667520, 'steps': 36459, 'loss/train': 2.132575273513794} +03/05/2022 08:19:30 - INFO - codeparrot_training - Step 36460: {'lr': 0.0004360463808755053, 'samples': 18668032, 'steps': 36460, 'loss/train': 1.800743579864502} +03/05/2022 08:19:31 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 08:19:35 - INFO - codeparrot_training - Step 36461: {'lr': 0.00043604283607173673, 'samples': 18668544, 'steps': 36461, 'loss/train': 2.295856237411499} +03/05/2022 08:19:39 - INFO - codeparrot_training - Step 36462: {'lr': 0.0004360392911841401, 'samples': 18669056, 'steps': 36462, 'loss/train': 2.181933879852295} +03/05/2022 08:19:39 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 08:19:44 - INFO - codeparrot_training - Step 36463: {'lr': 0.0004360357462127171, 'samples': 18669568, 'steps': 36463, 'loss/train': 2.0423195362091064} +03/05/2022 08:19:47 - INFO - codeparrot_training - Step 36464: {'lr': 0.0004360322011574692, 'samples': 18670080, 'steps': 36464, 'loss/train': 1.2879196405410767} +03/05/2022 08:19:48 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/05/2022 08:19:52 - INFO - codeparrot_training - Step 36465: {'lr': 0.00043602865601839817, 'samples': 18670592, 'steps': 36465, 'loss/train': 2.839481830596924} +03/05/2022 08:19:56 - INFO - codeparrot_training - Step 36466: {'lr': 0.00043602511079550535, 'samples': 18671104, 'steps': 36466, 'loss/train': 1.4449669122695923} +03/05/2022 08:19:56 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/05/2022 08:20:01 - INFO - codeparrot_training - Step 36467: {'lr': 0.0004360215654887926, 'samples': 18671616, 'steps': 36467, 'loss/train': 0.723706066608429} +03/05/2022 08:20:04 - INFO - codeparrot_training - Step 36468: {'lr': 0.0004360180200982613, 'samples': 18672128, 'steps': 36468, 'loss/train': 2.1930384635925293} +03/05/2022 08:20:04 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/05/2022 08:20:09 - INFO - codeparrot_training - Step 36469: {'lr': 0.00043601447462391317, 'samples': 18672640, 'steps': 36469, 'loss/train': 0.9871241450309753} +03/05/2022 08:20:12 - INFO - codeparrot_training - Step 36470: {'lr': 0.00043601092906574986, 'samples': 18673152, 'steps': 36470, 'loss/train': 1.6151540279388428} +03/05/2022 08:20:13 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 08:20:18 - INFO - codeparrot_training - Step 36471: {'lr': 0.0004360073834237729, 'samples': 18673664, 'steps': 36471, 'loss/train': 2.4731557369232178} +03/05/2022 08:20:21 - INFO - codeparrot_training - Step 36472: {'lr': 0.0004360038376979838, 'samples': 18674176, 'steps': 36472, 'loss/train': 1.1814765930175781} +03/05/2022 08:20:21 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 08:20:26 - INFO - codeparrot_training - Step 36473: {'lr': 0.0004360002918883843, 'samples': 18674688, 'steps': 36473, 'loss/train': 2.267200469970703} +03/05/2022 08:20:29 - INFO - codeparrot_training - Step 36474: {'lr': 0.00043599674599497593, 'samples': 18675200, 'steps': 36474, 'loss/train': 1.9048576354980469} +03/05/2022 08:20:29 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/05/2022 08:20:35 - INFO - codeparrot_training - Step 36475: {'lr': 0.00043599320001776025, 'samples': 18675712, 'steps': 36475, 'loss/train': 1.8426743745803833} +03/05/2022 08:20:38 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 08:20:40 - INFO - codeparrot_training - Step 36476: {'lr': 0.00043598965395673893, 'samples': 18676224, 'steps': 36476, 'loss/train': 0.9498752355575562} +03/05/2022 08:20:43 - INFO - codeparrot_training - Step 36477: {'lr': 0.0004359861078119136, 'samples': 18676736, 'steps': 36477, 'loss/train': 2.2847163677215576} +03/05/2022 08:20:46 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 08:20:48 - INFO - codeparrot_training - Step 36478: {'lr': 0.00043598256158328575, 'samples': 18677248, 'steps': 36478, 'loss/train': 2.398782968521118} +03/05/2022 08:20:52 - INFO - codeparrot_training - Step 36479: {'lr': 0.00043597901527085703, 'samples': 18677760, 'steps': 36479, 'loss/train': 2.351871967315674} +03/05/2022 08:20:54 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/05/2022 08:20:57 - INFO - codeparrot_training - Step 36480: {'lr': 0.000435975468874629, 'samples': 18678272, 'steps': 36480, 'loss/train': 2.591935396194458} +03/05/2022 08:21:00 - INFO - codeparrot_training - Step 36481: {'lr': 0.00043597192239460336, 'samples': 18678784, 'steps': 36481, 'loss/train': 1.418365716934204} +03/05/2022 08:21:03 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 08:21:05 - INFO - codeparrot_training - Step 36482: {'lr': 0.00043596837583078165, 'samples': 18679296, 'steps': 36482, 'loss/train': 1.7946773767471313} +03/05/2022 08:21:09 - INFO - codeparrot_training - Step 36483: {'lr': 0.0004359648291831654, 'samples': 18679808, 'steps': 36483, 'loss/train': 2.006937026977539} +03/05/2022 08:21:11 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 08:21:14 - INFO - codeparrot_training - Step 36484: {'lr': 0.0004359612824517563, 'samples': 18680320, 'steps': 36484, 'loss/train': 1.4025856256484985} +03/05/2022 08:21:17 - INFO - codeparrot_training - Step 36485: {'lr': 0.0004359577356365559, 'samples': 18680832, 'steps': 36485, 'loss/train': 1.3273124694824219} +03/05/2022 08:21:20 - INFO - codeparrot_training - Step 36486: {'lr': 0.00043595418873756584, 'samples': 18681344, 'steps': 36486, 'loss/train': 2.4480807781219482} +03/05/2022 08:21:21 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/05/2022 08:21:26 - INFO - codeparrot_training - Step 36487: {'lr': 0.0004359506417547876, 'samples': 18681856, 'steps': 36487, 'loss/train': 1.8371868133544922} +03/05/2022 08:21:29 - INFO - codeparrot_training - Step 36488: {'lr': 0.000435947094688223, 'samples': 18682368, 'steps': 36488, 'loss/train': 1.5153156518936157} +03/05/2022 08:21:29 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/05/2022 08:21:34 - INFO - codeparrot_training - Step 36489: {'lr': 0.0004359435475378735, 'samples': 18682880, 'steps': 36489, 'loss/train': 1.9459608793258667} +03/05/2022 08:21:37 - INFO - codeparrot_training - Step 36490: {'lr': 0.0004359400003037406, 'samples': 18683392, 'steps': 36490, 'loss/train': 1.829288363456726} +03/05/2022 08:21:37 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/05/2022 08:21:42 - INFO - codeparrot_training - Step 36491: {'lr': 0.0004359364529858261, 'samples': 18683904, 'steps': 36491, 'loss/train': 0.6640675663948059} +03/05/2022 08:21:46 - INFO - codeparrot_training - Step 36492: {'lr': 0.00043593290558413143, 'samples': 18684416, 'steps': 36492, 'loss/train': 1.7837222814559937} +03/05/2022 08:21:46 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/05/2022 08:21:51 - INFO - codeparrot_training - Step 36493: {'lr': 0.0004359293580986583, 'samples': 18684928, 'steps': 36493, 'loss/train': 1.5377029180526733} +03/05/2022 08:21:54 - INFO - codeparrot_training - Step 36494: {'lr': 0.0004359258105294083, 'samples': 18685440, 'steps': 36494, 'loss/train': 1.5339992046356201} +03/05/2022 08:21:54 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/05/2022 08:21:59 - INFO - codeparrot_training - Step 36495: {'lr': 0.0004359222628763829, 'samples': 18685952, 'steps': 36495, 'loss/train': 1.7072575092315674} +03/05/2022 08:22:02 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 08:22:04 - INFO - codeparrot_training - Step 36496: {'lr': 0.0004359187151395839, 'samples': 18686464, 'steps': 36496, 'loss/train': 1.6416996717453003} +03/05/2022 08:22:08 - INFO - codeparrot_training - Step 36497: {'lr': 0.0004359151673190127, 'samples': 18686976, 'steps': 36497, 'loss/train': 2.264662742614746} +03/05/2022 08:22:11 - INFO - codeparrot_training - Step 36498: {'lr': 0.0004359116194146711, 'samples': 18687488, 'steps': 36498, 'loss/train': 1.6730828285217285} +03/05/2022 08:22:11 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 08:22:16 - INFO - codeparrot_training - Step 36499: {'lr': 0.0004359080714265605, 'samples': 18688000, 'steps': 36499, 'loss/train': 0.8470184803009033} +03/05/2022 08:22:19 - INFO - codeparrot_training - Step 36500: {'lr': 0.00043590452335468265, 'samples': 18688512, 'steps': 36500, 'loss/train': 1.1176906824111938} +03/05/2022 08:22:19 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/05/2022 08:22:24 - INFO - codeparrot_training - Step 36501: {'lr': 0.00043590097519903917, 'samples': 18689024, 'steps': 36501, 'loss/train': 1.8993022441864014} +03/05/2022 08:22:27 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/05/2022 08:22:30 - INFO - codeparrot_training - Step 36502: {'lr': 0.0004358974269596314, 'samples': 18689536, 'steps': 36502, 'loss/train': 0.9637288451194763} +03/05/2022 08:22:33 - INFO - codeparrot_training - Step 36503: {'lr': 0.00043589387863646125, 'samples': 18690048, 'steps': 36503, 'loss/train': 1.8415104150772095} +03/05/2022 08:22:36 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 08:22:38 - INFO - codeparrot_training - Step 36504: {'lr': 0.0004358903302295301, 'samples': 18690560, 'steps': 36504, 'loss/train': 1.1889461278915405} +03/05/2022 08:22:41 - INFO - codeparrot_training - Step 36505: {'lr': 0.0004358867817388397, 'samples': 18691072, 'steps': 36505, 'loss/train': 1.4950592517852783} +03/05/2022 08:22:44 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/05/2022 08:22:47 - INFO - codeparrot_training - Step 36506: {'lr': 0.0004358832331643916, 'samples': 18691584, 'steps': 36506, 'loss/train': 1.5029492378234863} +03/05/2022 08:22:50 - INFO - codeparrot_training - Step 36507: {'lr': 0.0004358796845061873, 'samples': 18692096, 'steps': 36507, 'loss/train': 1.5153765678405762} +03/05/2022 08:22:52 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 08:22:55 - INFO - codeparrot_training - Step 36508: {'lr': 0.00043587613576422855, 'samples': 18692608, 'steps': 36508, 'loss/train': 1.9874777793884277} +03/05/2022 08:22:58 - INFO - codeparrot_training - Step 36509: {'lr': 0.00043587258693851685, 'samples': 18693120, 'steps': 36509, 'loss/train': 1.3859539031982422} +03/05/2022 08:23:01 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/05/2022 08:23:03 - INFO - codeparrot_training - Step 36510: {'lr': 0.0004358690380290539, 'samples': 18693632, 'steps': 36510, 'loss/train': 1.749043583869934} +03/05/2022 08:23:07 - INFO - codeparrot_training - Step 36511: {'lr': 0.00043586548903584113, 'samples': 18694144, 'steps': 36511, 'loss/train': 1.6331743001937866} +03/05/2022 08:23:09 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/05/2022 08:23:12 - INFO - codeparrot_training - Step 36512: {'lr': 0.0004358619399588802, 'samples': 18694656, 'steps': 36512, 'loss/train': 1.7348130941390991} +03/05/2022 08:23:15 - INFO - codeparrot_training - Step 36513: {'lr': 0.0004358583907981729, 'samples': 18695168, 'steps': 36513, 'loss/train': 1.2049554586410522} +03/05/2022 08:23:18 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/05/2022 08:23:20 - INFO - codeparrot_training - Step 36514: {'lr': 0.0004358548415537206, 'samples': 18695680, 'steps': 36514, 'loss/train': 1.5076621770858765} +03/05/2022 08:23:24 - INFO - codeparrot_training - Step 36515: {'lr': 0.000435851292225525, 'samples': 18696192, 'steps': 36515, 'loss/train': 1.8108024597167969} +03/05/2022 08:23:26 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 08:23:29 - INFO - codeparrot_training - Step 36516: {'lr': 0.0004358477428135876, 'samples': 18696704, 'steps': 36516, 'loss/train': 1.4344950914382935} +03/05/2022 08:23:32 - INFO - codeparrot_training - Step 36517: {'lr': 0.00043584419331791014, 'samples': 18697216, 'steps': 36517, 'loss/train': 1.6297340393066406} +03/05/2022 08:23:34 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/05/2022 08:23:37 - INFO - codeparrot_training - Step 36518: {'lr': 0.0004358406437384942, 'samples': 18697728, 'steps': 36518, 'loss/train': 1.5512477159500122} +03/05/2022 08:23:40 - INFO - codeparrot_training - Step 36519: {'lr': 0.0004358370940753412, 'samples': 18698240, 'steps': 36519, 'loss/train': 2.119971990585327} +03/05/2022 08:23:43 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/05/2022 08:23:46 - INFO - codeparrot_training - Step 36520: {'lr': 0.000435833544328453, 'samples': 18698752, 'steps': 36520, 'loss/train': 0.6191155314445496} +03/05/2022 08:23:49 - INFO - codeparrot_training - Step 36521: {'lr': 0.00043582999449783103, 'samples': 18699264, 'steps': 36521, 'loss/train': 2.4470598697662354} +03/05/2022 08:23:52 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/05/2022 08:23:54 - INFO - codeparrot_training - Step 36522: {'lr': 0.0004358264445834769, 'samples': 18699776, 'steps': 36522, 'loss/train': 2.3127388954162598} +03/05/2022 08:23:58 - INFO - codeparrot_training - Step 36523: {'lr': 0.00043582289458539224, 'samples': 18700288, 'steps': 36523, 'loss/train': 2.3628015518188477} +03/05/2022 08:24:00 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/05/2022 08:24:03 - INFO - codeparrot_training - Step 36524: {'lr': 0.00043581934450357876, 'samples': 18700800, 'steps': 36524, 'loss/train': 1.8854731321334839} +03/05/2022 08:24:06 - INFO - codeparrot_training - Step 36525: {'lr': 0.0004358157943380379, 'samples': 18701312, 'steps': 36525, 'loss/train': 1.24105703830719} +03/05/2022 08:24:08 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/05/2022 08:24:11 - INFO - codeparrot_training - Step 36526: {'lr': 0.00043581224408877116, 'samples': 18701824, 'steps': 36526, 'loss/train': 1.6562447547912598} +03/05/2022 08:24:14 - INFO - codeparrot_training - Step 36527: {'lr': 0.00043580869375578046, 'samples': 18702336, 'steps': 36527, 'loss/train': 1.2891017198562622} +03/05/2022 08:24:17 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 08:24:20 - INFO - codeparrot_training - Step 36528: {'lr': 0.00043580514333906717, 'samples': 18702848, 'steps': 36528, 'loss/train': 0.8692784309387207} +03/05/2022 08:24:23 - INFO - codeparrot_training - Step 36529: {'lr': 0.000435801592838633, 'samples': 18703360, 'steps': 36529, 'loss/train': 1.0447176694869995} +03/05/2022 08:24:25 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 08:24:28 - INFO - codeparrot_training - Step 36530: {'lr': 0.0004357980422544794, 'samples': 18703872, 'steps': 36530, 'loss/train': 1.8964654207229614} +03/05/2022 08:24:31 - INFO - codeparrot_training - Step 36531: {'lr': 0.00043579449158660815, 'samples': 18704384, 'steps': 36531, 'loss/train': 2.418860673904419} +03/05/2022 08:24:33 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/05/2022 08:24:37 - INFO - codeparrot_training - Step 36532: {'lr': 0.0004357909408350208, 'samples': 18704896, 'steps': 36532, 'loss/train': 2.0499579906463623} +03/05/2022 08:24:40 - INFO - codeparrot_training - Step 36533: {'lr': 0.00043578738999971886, 'samples': 18705408, 'steps': 36533, 'loss/train': 2.485102891921997} +03/05/2022 08:24:42 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 08:24:45 - INFO - codeparrot_training - Step 36534: {'lr': 0.000435783839080704, 'samples': 18705920, 'steps': 36534, 'loss/train': 1.6127310991287231} +03/05/2022 08:24:48 - INFO - codeparrot_training - Step 36535: {'lr': 0.00043578028807797774, 'samples': 18706432, 'steps': 36535, 'loss/train': 1.5184998512268066} +03/05/2022 08:24:50 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/05/2022 08:24:54 - INFO - codeparrot_training - Step 36536: {'lr': 0.0004357767369915419, 'samples': 18706944, 'steps': 36536, 'loss/train': 2.1432178020477295} +03/05/2022 08:24:57 - INFO - codeparrot_training - Step 36537: {'lr': 0.0004357731858213978, 'samples': 18707456, 'steps': 36537, 'loss/train': 0.7013729810714722} +03/05/2022 08:24:59 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/05/2022 08:25:02 - INFO - codeparrot_training - Step 36538: {'lr': 0.0004357696345675472, 'samples': 18707968, 'steps': 36538, 'loss/train': 1.1529698371887207} +03/05/2022 08:25:05 - INFO - codeparrot_training - Step 36539: {'lr': 0.00043576608322999167, 'samples': 18708480, 'steps': 36539, 'loss/train': 0.6617556810379028} +03/05/2022 08:25:07 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/05/2022 08:25:11 - INFO - codeparrot_training - Step 36540: {'lr': 0.0004357625318087328, 'samples': 18708992, 'steps': 36540, 'loss/train': 1.403743028640747} +03/05/2022 08:25:14 - INFO - codeparrot_training - Step 36541: {'lr': 0.00043575898030377225, 'samples': 18709504, 'steps': 36541, 'loss/train': 1.7805424928665161} +03/05/2022 08:25:15 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 08:25:19 - INFO - codeparrot_training - Step 36542: {'lr': 0.00043575542871511155, 'samples': 18710016, 'steps': 36542, 'loss/train': 1.4902095794677734} +03/05/2022 08:25:22 - INFO - codeparrot_training - Step 36543: {'lr': 0.00043575187704275234, 'samples': 18710528, 'steps': 36543, 'loss/train': 1.4140548706054688} +03/05/2022 08:25:24 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/05/2022 08:25:28 - INFO - codeparrot_training - Step 36544: {'lr': 0.0004357483252866961, 'samples': 18711040, 'steps': 36544, 'loss/train': 1.5703848600387573} +03/05/2022 08:25:31 - INFO - codeparrot_training - Step 36545: {'lr': 0.00043574477344694463, 'samples': 18711552, 'steps': 36545, 'loss/train': 1.5440270900726318} +03/05/2022 08:25:33 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 08:25:36 - INFO - codeparrot_training - Step 36546: {'lr': 0.0004357412215234994, 'samples': 18712064, 'steps': 36546, 'loss/train': 1.915827751159668} +03/05/2022 08:25:39 - INFO - codeparrot_training - Step 36547: {'lr': 0.00043573766951636206, 'samples': 18712576, 'steps': 36547, 'loss/train': 1.4978708028793335} +03/05/2022 08:25:41 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 08:25:45 - INFO - codeparrot_training - Step 36548: {'lr': 0.00043573411742553415, 'samples': 18713088, 'steps': 36548, 'loss/train': 1.4757479429244995} +03/05/2022 08:25:48 - INFO - codeparrot_training - Step 36549: {'lr': 0.0004357305652510174, 'samples': 18713600, 'steps': 36549, 'loss/train': 1.2901889085769653} +03/05/2022 08:25:49 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 08:25:53 - INFO - codeparrot_training - Step 36550: {'lr': 0.00043572701299281327, 'samples': 18714112, 'steps': 36550, 'loss/train': 1.7883002758026123} +03/05/2022 08:25:56 - INFO - codeparrot_training - Step 36551: {'lr': 0.0004357234606509234, 'samples': 18714624, 'steps': 36551, 'loss/train': 1.737593650817871} +03/05/2022 08:25:58 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 08:26:02 - INFO - codeparrot_training - Step 36552: {'lr': 0.00043571990822534936, 'samples': 18715136, 'steps': 36552, 'loss/train': 2.0328991413116455} +03/05/2022 08:26:05 - INFO - codeparrot_training - Step 36553: {'lr': 0.00043571635571609287, 'samples': 18715648, 'steps': 36553, 'loss/train': 1.2090493440628052} +03/05/2022 08:26:06 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/05/2022 08:26:10 - INFO - codeparrot_training - Step 36554: {'lr': 0.00043571280312315543, 'samples': 18716160, 'steps': 36554, 'loss/train': 1.7943819761276245} +03/05/2022 08:26:13 - INFO - codeparrot_training - Step 36555: {'lr': 0.0004357092504465386, 'samples': 18716672, 'steps': 36555, 'loss/train': 2.533794641494751} +03/05/2022 08:26:14 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/05/2022 08:26:18 - INFO - codeparrot_training - Step 36556: {'lr': 0.00043570569768624416, 'samples': 18717184, 'steps': 36556, 'loss/train': 2.2103922367095947} +03/05/2022 08:26:22 - INFO - codeparrot_training - Step 36557: {'lr': 0.00043570214484227353, 'samples': 18717696, 'steps': 36557, 'loss/train': 1.3598906993865967} +03/05/2022 08:26:22 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 08:26:27 - INFO - codeparrot_training - Step 36558: {'lr': 0.00043569859191462847, 'samples': 18718208, 'steps': 36558, 'loss/train': 2.7311465740203857} +03/05/2022 08:26:30 - INFO - codeparrot_training - Step 36559: {'lr': 0.0004356950389033104, 'samples': 18718720, 'steps': 36559, 'loss/train': 1.8683820962905884} +03/05/2022 08:26:31 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 08:26:35 - INFO - codeparrot_training - Step 36560: {'lr': 0.0004356914858083211, 'samples': 18719232, 'steps': 36560, 'loss/train': 1.4868749380111694} +03/05/2022 08:26:38 - INFO - codeparrot_training - Step 36561: {'lr': 0.00043568793262966195, 'samples': 18719744, 'steps': 36561, 'loss/train': 2.323687791824341} +03/05/2022 08:26:39 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/05/2022 08:26:44 - INFO - codeparrot_training - Step 36562: {'lr': 0.00043568437936733473, 'samples': 18720256, 'steps': 36562, 'loss/train': 1.4781105518341064} +03/05/2022 08:26:47 - INFO - codeparrot_training - Step 36563: {'lr': 0.0004356808260213411, 'samples': 18720768, 'steps': 36563, 'loss/train': 1.5390191078186035} +03/05/2022 08:26:49 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 08:26:52 - INFO - codeparrot_training - Step 36564: {'lr': 0.00043567727259168244, 'samples': 18721280, 'steps': 36564, 'loss/train': 1.2240735292434692} +03/05/2022 08:26:56 - INFO - codeparrot_training - Step 36565: {'lr': 0.0004356737190783605, 'samples': 18721792, 'steps': 36565, 'loss/train': 1.2456129789352417} +03/05/2022 08:26:58 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 08:27:01 - INFO - codeparrot_training - Step 36566: {'lr': 0.00043567016548137685, 'samples': 18722304, 'steps': 36566, 'loss/train': 2.0442705154418945} +03/05/2022 08:27:04 - INFO - codeparrot_training - Step 36567: {'lr': 0.00043566661180073304, 'samples': 18722816, 'steps': 36567, 'loss/train': 1.5831828117370605} +03/05/2022 08:27:06 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/05/2022 08:27:09 - INFO - codeparrot_training - Step 36568: {'lr': 0.00043566305803643073, 'samples': 18723328, 'steps': 36568, 'loss/train': 1.5288282632827759} +03/05/2022 08:27:12 - INFO - codeparrot_training - Step 36569: {'lr': 0.00043565950418847154, 'samples': 18723840, 'steps': 36569, 'loss/train': 0.8193228840827942} +03/05/2022 08:27:14 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 08:27:18 - INFO - codeparrot_training - Step 36570: {'lr': 0.00043565595025685705, 'samples': 18724352, 'steps': 36570, 'loss/train': 2.4879138469696045} +03/05/2022 08:27:21 - INFO - codeparrot_training - Step 36571: {'lr': 0.0004356523962415889, 'samples': 18724864, 'steps': 36571, 'loss/train': 1.8944706916809082} +03/05/2022 08:27:23 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/05/2022 08:27:26 - INFO - codeparrot_training - Step 36572: {'lr': 0.00043564884214266855, 'samples': 18725376, 'steps': 36572, 'loss/train': 1.8711698055267334} +03/05/2022 08:27:29 - INFO - codeparrot_training - Step 36573: {'lr': 0.00043564528796009774, 'samples': 18725888, 'steps': 36573, 'loss/train': 1.1864264011383057} +03/05/2022 08:27:31 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/05/2022 08:27:35 - INFO - codeparrot_training - Step 36574: {'lr': 0.00043564173369387807, 'samples': 18726400, 'steps': 36574, 'loss/train': 2.058751344680786} +03/05/2022 08:27:38 - INFO - codeparrot_training - Step 36575: {'lr': 0.00043563817934401107, 'samples': 18726912, 'steps': 36575, 'loss/train': 1.9796102046966553} +03/05/2022 08:27:39 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/05/2022 08:27:43 - INFO - codeparrot_training - Step 36576: {'lr': 0.0004356346249104983, 'samples': 18727424, 'steps': 36576, 'loss/train': 1.809995174407959} +03/05/2022 08:27:46 - INFO - codeparrot_training - Step 36577: {'lr': 0.0004356310703933415, 'samples': 18727936, 'steps': 36577, 'loss/train': 1.9293798208236694} +03/05/2022 08:27:48 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 08:27:51 - INFO - codeparrot_training - Step 36578: {'lr': 0.00043562751579254215, 'samples': 18728448, 'steps': 36578, 'loss/train': 1.7188117504119873} +03/05/2022 08:27:54 - INFO - codeparrot_training - Step 36579: {'lr': 0.00043562396110810196, 'samples': 18728960, 'steps': 36579, 'loss/train': 1.7587697505950928} +03/05/2022 08:27:56 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 08:28:00 - INFO - codeparrot_training - Step 36580: {'lr': 0.00043562040634002245, 'samples': 18729472, 'steps': 36580, 'loss/train': 1.9747369289398193} +03/05/2022 08:28:03 - INFO - codeparrot_training - Step 36581: {'lr': 0.0004356168514883053, 'samples': 18729984, 'steps': 36581, 'loss/train': 2.058427333831787} +03/05/2022 08:28:05 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 08:28:08 - INFO - codeparrot_training - Step 36582: {'lr': 0.000435613296552952, 'samples': 18730496, 'steps': 36582, 'loss/train': 1.7490038871765137} +03/05/2022 08:28:12 - INFO - codeparrot_training - Step 36583: {'lr': 0.0004356097415339643, 'samples': 18731008, 'steps': 36583, 'loss/train': 1.5420913696289062} +03/05/2022 08:28:14 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/05/2022 08:28:17 - INFO - codeparrot_training - Step 36584: {'lr': 0.0004356061864313436, 'samples': 18731520, 'steps': 36584, 'loss/train': 1.319744348526001} +03/05/2022 08:28:20 - INFO - codeparrot_training - Step 36585: {'lr': 0.0004356026312450917, 'samples': 18732032, 'steps': 36585, 'loss/train': 2.2613534927368164} +03/05/2022 08:28:23 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/05/2022 08:28:25 - INFO - codeparrot_training - Step 36586: {'lr': 0.00043559907597521007, 'samples': 18732544, 'steps': 36586, 'loss/train': 2.1069462299346924} +03/05/2022 08:28:28 - INFO - codeparrot_training - Step 36587: {'lr': 0.00043559552062170037, 'samples': 18733056, 'steps': 36587, 'loss/train': 2.013767719268799} +03/05/2022 08:28:31 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/05/2022 08:28:34 - INFO - codeparrot_training - Step 36588: {'lr': 0.00043559196518456425, 'samples': 18733568, 'steps': 36588, 'loss/train': 1.4833972454071045} +03/05/2022 08:28:37 - INFO - codeparrot_training - Step 36589: {'lr': 0.0004355884096638032, 'samples': 18734080, 'steps': 36589, 'loss/train': 2.004408359527588} +03/05/2022 08:28:39 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 08:28:42 - INFO - codeparrot_training - Step 36590: {'lr': 0.0004355848540594188, 'samples': 18734592, 'steps': 36590, 'loss/train': 2.231391191482544} +03/05/2022 08:28:45 - INFO - codeparrot_training - Step 36591: {'lr': 0.00043558129837141285, 'samples': 18735104, 'steps': 36591, 'loss/train': 1.957297921180725} +03/05/2022 08:28:48 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 08:28:50 - INFO - codeparrot_training - Step 36592: {'lr': 0.0004355777425997868, 'samples': 18735616, 'steps': 36592, 'loss/train': 1.3269720077514648} +03/05/2022 08:28:54 - INFO - codeparrot_training - Step 36593: {'lr': 0.0004355741867445423, 'samples': 18736128, 'steps': 36593, 'loss/train': 1.8008546829223633} +03/05/2022 08:28:56 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/05/2022 08:28:59 - INFO - codeparrot_training - Step 36594: {'lr': 0.00043557063080568094, 'samples': 18736640, 'steps': 36594, 'loss/train': 1.4983978271484375} +03/05/2022 08:29:02 - INFO - codeparrot_training - Step 36595: {'lr': 0.00043556707478320425, 'samples': 18737152, 'steps': 36595, 'loss/train': 2.248431921005249} +03/05/2022 08:29:04 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/05/2022 08:29:07 - INFO - codeparrot_training - Step 36596: {'lr': 0.000435563518677114, 'samples': 18737664, 'steps': 36596, 'loss/train': 1.5486775636672974} +03/05/2022 08:29:11 - INFO - codeparrot_training - Step 36597: {'lr': 0.00043555996248741157, 'samples': 18738176, 'steps': 36597, 'loss/train': 1.726602554321289} +03/05/2022 08:29:12 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 08:29:16 - INFO - codeparrot_training - Step 36598: {'lr': 0.00043555640621409874, 'samples': 18738688, 'steps': 36598, 'loss/train': 1.7763574123382568} +03/05/2022 08:29:19 - INFO - codeparrot_training - Step 36599: {'lr': 0.000435552849857177, 'samples': 18739200, 'steps': 36599, 'loss/train': 1.971351146697998} +03/05/2022 08:29:21 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 08:29:24 - INFO - codeparrot_training - Step 36600: {'lr': 0.0004355492934166481, 'samples': 18739712, 'steps': 36600, 'loss/train': 1.71782386302948} +03/05/2022 08:29:27 - INFO - codeparrot_training - Step 36601: {'lr': 0.00043554573689251355, 'samples': 18740224, 'steps': 36601, 'loss/train': 0.5695029497146606} +03/05/2022 08:29:30 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 08:29:33 - INFO - codeparrot_training - Step 36602: {'lr': 0.00043554218028477493, 'samples': 18740736, 'steps': 36602, 'loss/train': 1.830383539199829} +03/05/2022 08:29:36 - INFO - codeparrot_training - Step 36603: {'lr': 0.0004355386235934339, 'samples': 18741248, 'steps': 36603, 'loss/train': 1.4319417476654053} +03/05/2022 08:29:38 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 08:29:41 - INFO - codeparrot_training - Step 36604: {'lr': 0.0004355350668184919, 'samples': 18741760, 'steps': 36604, 'loss/train': 2.0565261840820312} +03/05/2022 08:29:44 - INFO - codeparrot_training - Step 36605: {'lr': 0.0004355315099599508, 'samples': 18742272, 'steps': 36605, 'loss/train': 1.1882394552230835} +03/05/2022 08:29:46 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/05/2022 08:29:49 - INFO - codeparrot_training - Step 36606: {'lr': 0.000435527953017812, 'samples': 18742784, 'steps': 36606, 'loss/train': 0.7650132179260254} +03/05/2022 08:29:53 - INFO - codeparrot_training - Step 36607: {'lr': 0.00043552439599207714, 'samples': 18743296, 'steps': 36607, 'loss/train': 1.7250149250030518} +03/05/2022 08:29:55 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 08:29:58 - INFO - codeparrot_training - Step 36608: {'lr': 0.00043552083888274794, 'samples': 18743808, 'steps': 36608, 'loss/train': 1.855858564376831} +03/05/2022 08:30:01 - INFO - codeparrot_training - Step 36609: {'lr': 0.00043551728168982583, 'samples': 18744320, 'steps': 36609, 'loss/train': 1.6502448320388794} +03/05/2022 08:30:03 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/05/2022 08:30:06 - INFO - codeparrot_training - Step 36610: {'lr': 0.0004355137244133126, 'samples': 18744832, 'steps': 36610, 'loss/train': 0.7300519347190857} +03/05/2022 08:30:10 - INFO - codeparrot_training - Step 36611: {'lr': 0.00043551016705320965, 'samples': 18745344, 'steps': 36611, 'loss/train': 1.4427647590637207} +03/05/2022 08:30:11 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/05/2022 08:30:15 - INFO - codeparrot_training - Step 36612: {'lr': 0.00043550660960951874, 'samples': 18745856, 'steps': 36612, 'loss/train': 2.145141363143921} +03/05/2022 08:30:18 - INFO - codeparrot_training - Step 36613: {'lr': 0.0004355030520822414, 'samples': 18746368, 'steps': 36613, 'loss/train': 1.865956425666809} +03/05/2022 08:30:20 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 08:30:23 - INFO - codeparrot_training - Step 36614: {'lr': 0.00043549949447137915, 'samples': 18746880, 'steps': 36614, 'loss/train': 1.9670602083206177} +03/05/2022 08:30:27 - INFO - codeparrot_training - Step 36615: {'lr': 0.00043549593677693385, 'samples': 18747392, 'steps': 36615, 'loss/train': 0.4524717926979065} +03/05/2022 08:30:28 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 08:30:32 - INFO - codeparrot_training - Step 36616: {'lr': 0.0004354923789989068, 'samples': 18747904, 'steps': 36616, 'loss/train': 2.0643012523651123} +03/05/2022 08:30:35 - INFO - codeparrot_training - Step 36617: {'lr': 0.0004354888211372998, 'samples': 18748416, 'steps': 36617, 'loss/train': 1.5244133472442627} +03/05/2022 08:30:37 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/05/2022 08:30:40 - INFO - codeparrot_training - Step 36618: {'lr': 0.0004354852631921145, 'samples': 18748928, 'steps': 36618, 'loss/train': 2.306825876235962} +03/05/2022 08:30:43 - INFO - codeparrot_training - Step 36619: {'lr': 0.0004354817051633523, 'samples': 18749440, 'steps': 36619, 'loss/train': 1.7672079801559448} +03/05/2022 08:30:45 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 08:30:49 - INFO - codeparrot_training - Step 36620: {'lr': 0.00043547814705101486, 'samples': 18749952, 'steps': 36620, 'loss/train': 1.7525672912597656} +03/05/2022 08:30:52 - INFO - codeparrot_training - Step 36621: {'lr': 0.00043547458885510393, 'samples': 18750464, 'steps': 36621, 'loss/train': 2.43685245513916} +03/05/2022 08:30:53 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/05/2022 08:30:57 - INFO - codeparrot_training - Step 36622: {'lr': 0.00043547103057562097, 'samples': 18750976, 'steps': 36622, 'loss/train': 2.3709559440612793} +03/05/2022 08:31:00 - INFO - codeparrot_training - Step 36623: {'lr': 0.00043546747221256764, 'samples': 18751488, 'steps': 36623, 'loss/train': 2.236823797225952} +03/05/2022 08:31:02 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 08:31:06 - INFO - codeparrot_training - Step 36624: {'lr': 0.00043546391376594553, 'samples': 18752000, 'steps': 36624, 'loss/train': 0.8149133920669556} +03/05/2022 08:31:09 - INFO - codeparrot_training - Step 36625: {'lr': 0.0004354603552357562, 'samples': 18752512, 'steps': 36625, 'loss/train': 2.5442864894866943} +03/05/2022 08:31:10 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/05/2022 08:31:14 - INFO - codeparrot_training - Step 36626: {'lr': 0.0004354567966220013, 'samples': 18753024, 'steps': 36626, 'loss/train': 1.8254780769348145} +03/05/2022 08:31:17 - INFO - codeparrot_training - Step 36627: {'lr': 0.0004354532379246825, 'samples': 18753536, 'steps': 36627, 'loss/train': 2.0811667442321777} +03/05/2022 08:31:19 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/05/2022 08:31:22 - INFO - codeparrot_training - Step 36628: {'lr': 0.0004354496791438013, 'samples': 18754048, 'steps': 36628, 'loss/train': 1.9770479202270508} +03/05/2022 08:31:26 - INFO - codeparrot_training - Step 36629: {'lr': 0.0004354461202793593, 'samples': 18754560, 'steps': 36629, 'loss/train': 1.8774384260177612} +03/05/2022 08:31:27 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/05/2022 08:31:31 - INFO - codeparrot_training - Step 36630: {'lr': 0.00043544256133135815, 'samples': 18755072, 'steps': 36630, 'loss/train': 1.4496067762374878} +03/05/2022 08:31:34 - INFO - codeparrot_training - Step 36631: {'lr': 0.0004354390022997995, 'samples': 18755584, 'steps': 36631, 'loss/train': 1.600720763206482} +03/05/2022 08:31:36 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 08:31:39 - INFO - codeparrot_training - Step 36632: {'lr': 0.0004354354431846848, 'samples': 18756096, 'steps': 36632, 'loss/train': 1.5415774583816528} +03/05/2022 08:31:42 - INFO - codeparrot_training - Step 36633: {'lr': 0.00043543188398601586, 'samples': 18756608, 'steps': 36633, 'loss/train': 2.1242449283599854} +03/05/2022 08:31:44 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/05/2022 08:31:48 - INFO - codeparrot_training - Step 36634: {'lr': 0.00043542832470379415, 'samples': 18757120, 'steps': 36634, 'loss/train': 1.3364808559417725} +03/05/2022 08:31:51 - INFO - codeparrot_training - Step 36635: {'lr': 0.0004354247653380212, 'samples': 18757632, 'steps': 36635, 'loss/train': 1.8590220212936401} +03/05/2022 08:31:52 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 08:31:56 - INFO - codeparrot_training - Step 36636: {'lr': 0.00043542120588869885, 'samples': 18758144, 'steps': 36636, 'loss/train': 2.547084093093872} +03/05/2022 08:31:59 - INFO - codeparrot_training - Step 36637: {'lr': 0.0004354176463558284, 'samples': 18758656, 'steps': 36637, 'loss/train': 1.783582329750061} +03/05/2022 08:32:01 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 08:32:05 - INFO - codeparrot_training - Step 36638: {'lr': 0.00043541408673941173, 'samples': 18759168, 'steps': 36638, 'loss/train': 0.7238414287567139} +03/05/2022 08:32:08 - INFO - codeparrot_training - Step 36639: {'lr': 0.00043541052703945034, 'samples': 18759680, 'steps': 36639, 'loss/train': 2.013413190841675} +03/05/2022 08:32:09 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 08:32:13 - INFO - codeparrot_training - Step 36640: {'lr': 0.0004354069672559458, 'samples': 18760192, 'steps': 36640, 'loss/train': 1.586073398590088} +03/05/2022 08:32:16 - INFO - codeparrot_training - Step 36641: {'lr': 0.0004354034073888997, 'samples': 18760704, 'steps': 36641, 'loss/train': 2.944774866104126} +03/05/2022 08:32:19 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/05/2022 08:32:22 - INFO - codeparrot_training - Step 36642: {'lr': 0.00043539984743831375, 'samples': 18761216, 'steps': 36642, 'loss/train': 1.4330339431762695} +03/05/2022 08:32:25 - INFO - codeparrot_training - Step 36643: {'lr': 0.0004353962874041895, 'samples': 18761728, 'steps': 36643, 'loss/train': 2.499502182006836} +03/05/2022 08:32:28 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/05/2022 08:32:30 - INFO - codeparrot_training - Step 36644: {'lr': 0.0004353927272865285, 'samples': 18762240, 'steps': 36644, 'loss/train': 1.3408678770065308} +03/05/2022 08:32:33 - INFO - codeparrot_training - Step 36645: {'lr': 0.0004353891670853324, 'samples': 18762752, 'steps': 36645, 'loss/train': 2.179685592651367} +03/05/2022 08:32:36 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 08:32:39 - INFO - codeparrot_training - Step 36646: {'lr': 0.00043538560680060287, 'samples': 18763264, 'steps': 36646, 'loss/train': 1.1304265260696411} +03/05/2022 08:32:42 - INFO - codeparrot_training - Step 36647: {'lr': 0.00043538204643234137, 'samples': 18763776, 'steps': 36647, 'loss/train': 1.6504698991775513} +03/05/2022 08:32:44 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/05/2022 08:32:47 - INFO - codeparrot_training - Step 36648: {'lr': 0.0004353784859805496, 'samples': 18764288, 'steps': 36648, 'loss/train': 1.3401870727539062} +03/05/2022 08:32:50 - INFO - codeparrot_training - Step 36649: {'lr': 0.00043537492544522917, 'samples': 18764800, 'steps': 36649, 'loss/train': 1.0732760429382324} +03/05/2022 08:32:53 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 08:32:56 - INFO - codeparrot_training - Step 36650: {'lr': 0.0004353713648263816, 'samples': 18765312, 'steps': 36650, 'loss/train': 1.5390355587005615} +03/05/2022 08:32:59 - INFO - codeparrot_training - Step 36651: {'lr': 0.00043536780412400857, 'samples': 18765824, 'steps': 36651, 'loss/train': 1.8621513843536377} +03/05/2022 08:33:02 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/05/2022 08:33:04 - INFO - codeparrot_training - Step 36652: {'lr': 0.0004353642433381117, 'samples': 18766336, 'steps': 36652, 'loss/train': 2.1783015727996826} +03/05/2022 08:33:07 - INFO - codeparrot_training - Step 36653: {'lr': 0.00043536068246869254, 'samples': 18766848, 'steps': 36653, 'loss/train': 1.9883053302764893} +03/05/2022 08:33:10 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/05/2022 08:33:13 - INFO - codeparrot_training - Step 36654: {'lr': 0.00043535712151575274, 'samples': 18767360, 'steps': 36654, 'loss/train': 1.2596718072891235} +03/05/2022 08:33:16 - INFO - codeparrot_training - Step 36655: {'lr': 0.00043535356047929387, 'samples': 18767872, 'steps': 36655, 'loss/train': 2.145206928253174} +03/05/2022 08:33:18 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/05/2022 08:33:21 - INFO - codeparrot_training - Step 36656: {'lr': 0.0004353499993593176, 'samples': 18768384, 'steps': 36656, 'loss/train': 1.0523627996444702} +03/05/2022 08:33:24 - INFO - codeparrot_training - Step 36657: {'lr': 0.0004353464381558254, 'samples': 18768896, 'steps': 36657, 'loss/train': 1.7637388706207275} +03/05/2022 08:33:26 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/05/2022 08:33:29 - INFO - codeparrot_training - Step 36658: {'lr': 0.00043534287686881895, 'samples': 18769408, 'steps': 36658, 'loss/train': 1.6951905488967896} +03/05/2022 08:33:33 - INFO - codeparrot_training - Step 36659: {'lr': 0.00043533931549829993, 'samples': 18769920, 'steps': 36659, 'loss/train': 2.221018075942993} +03/05/2022 08:33:35 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 08:33:38 - INFO - codeparrot_training - Step 36660: {'lr': 0.00043533575404426986, 'samples': 18770432, 'steps': 36660, 'loss/train': 2.1294572353363037} +03/05/2022 08:33:41 - INFO - codeparrot_training - Step 36661: {'lr': 0.0004353321925067303, 'samples': 18770944, 'steps': 36661, 'loss/train': 1.4282238483428955} +03/05/2022 08:33:43 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 08:33:46 - INFO - codeparrot_training - Step 36662: {'lr': 0.0004353286308856829, 'samples': 18771456, 'steps': 36662, 'loss/train': 1.5797237157821655} +03/05/2022 08:33:49 - INFO - codeparrot_training - Step 36663: {'lr': 0.00043532506918112933, 'samples': 18771968, 'steps': 36663, 'loss/train': 1.6038144826889038} +03/05/2022 08:33:51 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 08:33:55 - INFO - codeparrot_training - Step 36664: {'lr': 0.0004353215073930712, 'samples': 18772480, 'steps': 36664, 'loss/train': 0.9530586004257202} +03/05/2022 08:33:58 - INFO - codeparrot_training - Step 36665: {'lr': 0.00043531794552150994, 'samples': 18772992, 'steps': 36665, 'loss/train': 2.021253824234009} +03/05/2022 08:34:00 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 08:34:03 - INFO - codeparrot_training - Step 36666: {'lr': 0.0004353143835664474, 'samples': 18773504, 'steps': 36666, 'loss/train': 2.064541816711426} +03/05/2022 08:34:06 - INFO - codeparrot_training - Step 36667: {'lr': 0.00043531082152788495, 'samples': 18774016, 'steps': 36667, 'loss/train': 1.6886372566223145} +03/05/2022 08:34:08 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/05/2022 08:34:12 - INFO - codeparrot_training - Step 36668: {'lr': 0.0004353072594058243, 'samples': 18774528, 'steps': 36668, 'loss/train': 2.5402162075042725} +03/05/2022 08:34:15 - INFO - codeparrot_training - Step 36669: {'lr': 0.0004353036972002671, 'samples': 18775040, 'steps': 36669, 'loss/train': 2.001192569732666} +03/05/2022 08:34:17 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 08:34:20 - INFO - codeparrot_training - Step 36670: {'lr': 0.00043530013491121497, 'samples': 18775552, 'steps': 36670, 'loss/train': 1.9092366695404053} +03/05/2022 08:34:23 - INFO - codeparrot_training - Step 36671: {'lr': 0.00043529657253866936, 'samples': 18776064, 'steps': 36671, 'loss/train': 1.537556767463684} +03/05/2022 08:34:25 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/05/2022 08:34:29 - INFO - codeparrot_training - Step 36672: {'lr': 0.000435293010082632, 'samples': 18776576, 'steps': 36672, 'loss/train': 0.7188050150871277} +03/05/2022 08:34:32 - INFO - codeparrot_training - Step 36673: {'lr': 0.0004352894475431045, 'samples': 18777088, 'steps': 36673, 'loss/train': 0.49456536769866943} +03/05/2022 08:34:34 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/05/2022 08:34:37 - INFO - codeparrot_training - Step 36674: {'lr': 0.0004352858849200885, 'samples': 18777600, 'steps': 36674, 'loss/train': 1.6767886877059937} +03/05/2022 08:34:41 - INFO - codeparrot_training - Step 36675: {'lr': 0.0004352823222135854, 'samples': 18778112, 'steps': 36675, 'loss/train': 1.0852221250534058} +03/05/2022 08:34:42 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 08:34:46 - INFO - codeparrot_training - Step 36676: {'lr': 0.00043527875942359697, 'samples': 18778624, 'steps': 36676, 'loss/train': 1.648712396621704} +03/05/2022 08:34:49 - INFO - codeparrot_training - Step 36677: {'lr': 0.0004352751965501248, 'samples': 18779136, 'steps': 36677, 'loss/train': 1.6284223794937134} +03/05/2022 08:34:51 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 08:34:54 - INFO - codeparrot_training - Step 36678: {'lr': 0.0004352716335931706, 'samples': 18779648, 'steps': 36678, 'loss/train': 2.3753294944763184} +03/05/2022 08:34:58 - INFO - codeparrot_training - Step 36679: {'lr': 0.0004352680705527357, 'samples': 18780160, 'steps': 36679, 'loss/train': 1.6806747913360596} +03/05/2022 08:35:00 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 08:35:03 - INFO - codeparrot_training - Step 36680: {'lr': 0.00043526450742882193, 'samples': 18780672, 'steps': 36680, 'loss/train': 2.032686233520508} +03/05/2022 08:35:06 - INFO - codeparrot_training - Step 36681: {'lr': 0.0004352609442214309, 'samples': 18781184, 'steps': 36681, 'loss/train': 1.3099148273468018} +03/05/2022 08:35:08 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 08:35:11 - INFO - codeparrot_training - Step 36682: {'lr': 0.00043525738093056404, 'samples': 18781696, 'steps': 36682, 'loss/train': 2.1943650245666504} +03/05/2022 08:35:14 - INFO - codeparrot_training - Step 36683: {'lr': 0.0004352538175562231, 'samples': 18782208, 'steps': 36683, 'loss/train': 1.966793179512024} +03/05/2022 08:35:17 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 08:35:20 - INFO - codeparrot_training - Step 36684: {'lr': 0.00043525025409840967, 'samples': 18782720, 'steps': 36684, 'loss/train': 1.1217139959335327} +03/05/2022 08:35:23 - INFO - codeparrot_training - Step 36685: {'lr': 0.00043524669055712534, 'samples': 18783232, 'steps': 36685, 'loss/train': 1.6760820150375366} +03/05/2022 08:35:26 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 08:35:28 - INFO - codeparrot_training - Step 36686: {'lr': 0.00043524312693237166, 'samples': 18783744, 'steps': 36686, 'loss/train': 0.9233697056770325} +03/05/2022 08:35:31 - INFO - codeparrot_training - Step 36687: {'lr': 0.0004352395632241504, 'samples': 18784256, 'steps': 36687, 'loss/train': 1.7965147495269775} +03/05/2022 08:35:34 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 08:35:37 - INFO - codeparrot_training - Step 36688: {'lr': 0.00043523599943246297, 'samples': 18784768, 'steps': 36688, 'loss/train': 1.6315757036209106} +03/05/2022 08:35:40 - INFO - codeparrot_training - Step 36689: {'lr': 0.00043523243555731094, 'samples': 18785280, 'steps': 36689, 'loss/train': 1.9857407808303833} +03/05/2022 08:35:42 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/05/2022 08:35:45 - INFO - codeparrot_training - Step 36690: {'lr': 0.00043522887159869617, 'samples': 18785792, 'steps': 36690, 'loss/train': 1.612954020500183} +03/05/2022 08:35:48 - INFO - codeparrot_training - Step 36691: {'lr': 0.00043522530755662017, 'samples': 18786304, 'steps': 36691, 'loss/train': 1.1465319395065308} +03/05/2022 08:35:50 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 08:35:54 - INFO - codeparrot_training - Step 36692: {'lr': 0.00043522174343108445, 'samples': 18786816, 'steps': 36692, 'loss/train': 2.0332961082458496} +03/05/2022 08:35:57 - INFO - codeparrot_training - Step 36693: {'lr': 0.00043521817922209064, 'samples': 18787328, 'steps': 36693, 'loss/train': 1.6126130819320679} +03/05/2022 08:35:59 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/05/2022 08:36:02 - INFO - codeparrot_training - Step 36694: {'lr': 0.00043521461492964037, 'samples': 18787840, 'steps': 36694, 'loss/train': 1.2714495658874512} +03/05/2022 08:36:05 - INFO - codeparrot_training - Step 36695: {'lr': 0.00043521105055373526, 'samples': 18788352, 'steps': 36695, 'loss/train': 1.5185444355010986} +03/05/2022 08:36:07 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 08:36:11 - INFO - codeparrot_training - Step 36696: {'lr': 0.000435207486094377, 'samples': 18788864, 'steps': 36696, 'loss/train': 1.5647741556167603} +03/05/2022 08:36:14 - INFO - codeparrot_training - Step 36697: {'lr': 0.00043520392155156694, 'samples': 18789376, 'steps': 36697, 'loss/train': 1.5304478406906128} +03/05/2022 08:36:16 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/05/2022 08:36:19 - INFO - codeparrot_training - Step 36698: {'lr': 0.000435200356925307, 'samples': 18789888, 'steps': 36698, 'loss/train': 1.0248026847839355} +03/05/2022 08:36:22 - INFO - codeparrot_training - Step 36699: {'lr': 0.0004351967922155986, 'samples': 18790400, 'steps': 36699, 'loss/train': 1.649327278137207} +03/05/2022 08:36:24 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 08:36:27 - INFO - codeparrot_training - Step 36700: {'lr': 0.0004351932274224434, 'samples': 18790912, 'steps': 36700, 'loss/train': 1.4625699520111084} +03/05/2022 08:36:31 - INFO - codeparrot_training - Step 36701: {'lr': 0.0004351896625458429, 'samples': 18791424, 'steps': 36701, 'loss/train': 1.0550603866577148} +03/05/2022 08:36:33 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/05/2022 08:36:36 - INFO - codeparrot_training - Step 36702: {'lr': 0.0004351860975857989, 'samples': 18791936, 'steps': 36702, 'loss/train': 1.3552054166793823} +03/05/2022 08:36:39 - INFO - codeparrot_training - Step 36703: {'lr': 0.00043518253254231276, 'samples': 18792448, 'steps': 36703, 'loss/train': 1.5167899131774902} +03/05/2022 08:36:41 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 08:36:44 - INFO - codeparrot_training - Step 36704: {'lr': 0.00043517896741538634, 'samples': 18792960, 'steps': 36704, 'loss/train': 1.8610939979553223} +03/05/2022 08:36:47 - INFO - codeparrot_training - Step 36705: {'lr': 0.0004351754022050212, 'samples': 18793472, 'steps': 36705, 'loss/train': 2.204897880554199} +03/05/2022 08:36:50 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/05/2022 08:36:53 - INFO - codeparrot_training - Step 36706: {'lr': 0.00043517183691121875, 'samples': 18793984, 'steps': 36706, 'loss/train': 1.5272445678710938} +03/05/2022 08:36:56 - INFO - codeparrot_training - Step 36707: {'lr': 0.00043516827153398073, 'samples': 18794496, 'steps': 36707, 'loss/train': 1.2016654014587402} +03/05/2022 08:36:58 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 08:37:01 - INFO - codeparrot_training - Step 36708: {'lr': 0.0004351647060733088, 'samples': 18795008, 'steps': 36708, 'loss/train': 1.6325432062149048} +03/05/2022 08:37:04 - INFO - codeparrot_training - Step 36709: {'lr': 0.00043516114052920453, 'samples': 18795520, 'steps': 36709, 'loss/train': 1.5337028503417969} +03/05/2022 08:37:06 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 08:37:10 - INFO - codeparrot_training - Step 36710: {'lr': 0.00043515757490166944, 'samples': 18796032, 'steps': 36710, 'loss/train': 1.787655234336853} +03/05/2022 08:37:13 - INFO - codeparrot_training - Step 36711: {'lr': 0.00043515400919070526, 'samples': 18796544, 'steps': 36711, 'loss/train': 1.1704010963439941} +03/05/2022 08:37:16 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/05/2022 08:37:19 - INFO - codeparrot_training - Step 36712: {'lr': 0.0004351504433963135, 'samples': 18797056, 'steps': 36712, 'loss/train': 0.24939727783203125} +03/05/2022 08:37:22 - INFO - codeparrot_training - Step 36713: {'lr': 0.0004351468775184959, 'samples': 18797568, 'steps': 36713, 'loss/train': 3.1073429584503174} +03/05/2022 08:37:25 - INFO - codeparrot_training - Step 36714: {'lr': 0.0004351433115572538, 'samples': 18798080, 'steps': 36714, 'loss/train': 1.1566107273101807} +03/05/2022 08:37:25 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 08:37:30 - INFO - codeparrot_training - Step 36715: {'lr': 0.00043513974551258913, 'samples': 18798592, 'steps': 36715, 'loss/train': 2.4987802505493164} +03/05/2022 08:37:33 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 08:37:36 - INFO - codeparrot_training - Step 36716: {'lr': 0.00043513617938450327, 'samples': 18799104, 'steps': 36716, 'loss/train': 1.4679028987884521} +03/05/2022 08:37:39 - INFO - codeparrot_training - Step 36717: {'lr': 0.00043513261317299797, 'samples': 18799616, 'steps': 36717, 'loss/train': 2.269044876098633} +03/05/2022 08:37:42 - INFO - codeparrot_training - Step 36718: {'lr': 0.00043512904687807475, 'samples': 18800128, 'steps': 36718, 'loss/train': 2.2696533203125} +03/05/2022 08:37:43 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/05/2022 08:37:47 - INFO - codeparrot_training - Step 36719: {'lr': 0.00043512548049973523, 'samples': 18800640, 'steps': 36719, 'loss/train': 1.6756986379623413} +03/05/2022 08:37:51 - INFO - codeparrot_training - Step 36720: {'lr': 0.00043512191403798095, 'samples': 18801152, 'steps': 36720, 'loss/train': 1.552201509475708} +03/05/2022 08:37:51 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 08:37:56 - INFO - codeparrot_training - Step 36721: {'lr': 0.0004351183474928137, 'samples': 18801664, 'steps': 36721, 'loss/train': 1.1354302167892456} +03/05/2022 08:37:59 - INFO - codeparrot_training - Step 36722: {'lr': 0.00043511478086423493, 'samples': 18802176, 'steps': 36722, 'loss/train': 1.8882936239242554} +03/05/2022 08:38:00 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/05/2022 08:38:04 - INFO - codeparrot_training - Step 36723: {'lr': 0.0004351112141522463, 'samples': 18802688, 'steps': 36723, 'loss/train': 1.8290765285491943} +03/05/2022 08:38:08 - INFO - codeparrot_training - Step 36724: {'lr': 0.00043510764735684945, 'samples': 18803200, 'steps': 36724, 'loss/train': 1.184261441230774} +03/05/2022 08:38:08 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/05/2022 08:38:13 - INFO - codeparrot_training - Step 36725: {'lr': 0.0004351040804780459, 'samples': 18803712, 'steps': 36725, 'loss/train': 1.8711128234863281} +03/05/2022 08:38:16 - INFO - codeparrot_training - Step 36726: {'lr': 0.00043510051351583733, 'samples': 18804224, 'steps': 36726, 'loss/train': 1.0512408018112183} +03/05/2022 08:38:17 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/05/2022 08:38:21 - INFO - codeparrot_training - Step 36727: {'lr': 0.0004350969464702254, 'samples': 18804736, 'steps': 36727, 'loss/train': 1.9921079874038696} +03/05/2022 08:38:24 - INFO - codeparrot_training - Step 36728: {'lr': 0.0004350933793412115, 'samples': 18805248, 'steps': 36728, 'loss/train': 0.132417693734169} +03/05/2022 08:38:25 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 08:38:30 - INFO - codeparrot_training - Step 36729: {'lr': 0.00043508981212879737, 'samples': 18805760, 'steps': 36729, 'loss/train': 1.6533011198043823} +03/05/2022 08:38:33 - INFO - codeparrot_training - Step 36730: {'lr': 0.0004350862448329848, 'samples': 18806272, 'steps': 36730, 'loss/train': 1.3240827322006226} +03/05/2022 08:38:33 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/05/2022 08:38:38 - INFO - codeparrot_training - Step 36731: {'lr': 0.00043508267745377504, 'samples': 18806784, 'steps': 36731, 'loss/train': 1.3252646923065186} +03/05/2022 08:38:41 - INFO - codeparrot_training - Step 36732: {'lr': 0.00043507910999117003, 'samples': 18807296, 'steps': 36732, 'loss/train': 0.8646374344825745} +03/05/2022 08:38:42 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 08:38:46 - INFO - codeparrot_training - Step 36733: {'lr': 0.00043507554244517113, 'samples': 18807808, 'steps': 36733, 'loss/train': 2.1603941917419434} +03/05/2022 08:38:50 - INFO - codeparrot_training - Step 36734: {'lr': 0.0004350719748157801, 'samples': 18808320, 'steps': 36734, 'loss/train': 1.5149245262145996} +03/05/2022 08:38:50 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 08:38:55 - INFO - codeparrot_training - Step 36735: {'lr': 0.00043506840710299844, 'samples': 18808832, 'steps': 36735, 'loss/train': 1.9402501583099365} +03/05/2022 08:38:58 - INFO - codeparrot_training - Step 36736: {'lr': 0.00043506483930682785, 'samples': 18809344, 'steps': 36736, 'loss/train': 1.213230848312378} +03/05/2022 08:38:58 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 08:39:03 - INFO - codeparrot_training - Step 36737: {'lr': 0.0004350612714272699, 'samples': 18809856, 'steps': 36737, 'loss/train': 0.8067896962165833} +03/05/2022 08:39:06 - INFO - codeparrot_training - Step 36738: {'lr': 0.0004350577034643262, 'samples': 18810368, 'steps': 36738, 'loss/train': 1.1526211500167847} +03/05/2022 08:39:07 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 08:39:12 - INFO - codeparrot_training - Step 36739: {'lr': 0.0004350541354179983, 'samples': 18810880, 'steps': 36739, 'loss/train': 2.2019336223602295} +03/05/2022 08:39:15 - INFO - codeparrot_training - Step 36740: {'lr': 0.00043505056728828794, 'samples': 18811392, 'steps': 36740, 'loss/train': 1.470820426940918} +03/05/2022 08:39:15 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/05/2022 08:39:20 - INFO - codeparrot_training - Step 36741: {'lr': 0.0004350469990751966, 'samples': 18811904, 'steps': 36741, 'loss/train': 1.558790922164917} +03/05/2022 08:39:23 - INFO - codeparrot_training - Step 36742: {'lr': 0.000435043430778726, 'samples': 18812416, 'steps': 36742, 'loss/train': 1.9770536422729492} +03/05/2022 08:39:23 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/05/2022 08:39:28 - INFO - codeparrot_training - Step 36743: {'lr': 0.00043503986239887765, 'samples': 18812928, 'steps': 36743, 'loss/train': 1.3653024435043335} +03/05/2022 08:39:32 - INFO - codeparrot_training - Step 36744: {'lr': 0.0004350362939356532, 'samples': 18813440, 'steps': 36744, 'loss/train': 1.638286828994751} +03/05/2022 08:39:32 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/05/2022 08:39:37 - INFO - codeparrot_training - Step 36745: {'lr': 0.00043503272538905423, 'samples': 18813952, 'steps': 36745, 'loss/train': 1.5700247287750244} +03/05/2022 08:39:40 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/05/2022 08:39:42 - INFO - codeparrot_training - Step 36746: {'lr': 0.0004350291567590824, 'samples': 18814464, 'steps': 36746, 'loss/train': 1.4321941137313843} +03/05/2022 08:39:45 - INFO - codeparrot_training - Step 36747: {'lr': 0.00043502558804573924, 'samples': 18814976, 'steps': 36747, 'loss/train': 1.7214068174362183} +03/05/2022 08:39:48 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 08:39:51 - INFO - codeparrot_training - Step 36748: {'lr': 0.0004350220192490264, 'samples': 18815488, 'steps': 36748, 'loss/train': 1.472598910331726} +03/05/2022 08:39:54 - INFO - codeparrot_training - Step 36749: {'lr': 0.00043501845036894555, 'samples': 18816000, 'steps': 36749, 'loss/train': 2.095392942428589} +03/05/2022 08:39:56 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 08:39:59 - INFO - codeparrot_training - Step 36750: {'lr': 0.00043501488140549824, 'samples': 18816512, 'steps': 36750, 'loss/train': 1.8702508211135864} +03/05/2022 08:40:02 - INFO - codeparrot_training - Step 36751: {'lr': 0.000435011312358686, 'samples': 18817024, 'steps': 36751, 'loss/train': 1.7569918632507324} +03/05/2022 08:40:05 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 08:40:07 - INFO - codeparrot_training - Step 36752: {'lr': 0.0004350077432285106, 'samples': 18817536, 'steps': 36752, 'loss/train': 2.292306661605835} +03/05/2022 08:40:11 - INFO - codeparrot_training - Step 36753: {'lr': 0.0004350041740149735, 'samples': 18818048, 'steps': 36753, 'loss/train': 1.3701976537704468} +03/05/2022 08:40:13 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/05/2022 08:40:16 - INFO - codeparrot_training - Step 36754: {'lr': 0.00043500060471807645, 'samples': 18818560, 'steps': 36754, 'loss/train': 1.4870412349700928} +03/05/2022 08:40:19 - INFO - codeparrot_training - Step 36755: {'lr': 0.000434997035337821, 'samples': 18819072, 'steps': 36755, 'loss/train': 1.5104273557662964} +03/05/2022 08:40:22 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 08:40:24 - INFO - codeparrot_training - Step 36756: {'lr': 0.0004349934658742086, 'samples': 18819584, 'steps': 36756, 'loss/train': 1.3527086973190308} +03/05/2022 08:40:27 - INFO - codeparrot_training - Step 36757: {'lr': 0.00043498989632724105, 'samples': 18820096, 'steps': 36757, 'loss/train': 1.6256399154663086} +03/05/2022 08:40:30 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/05/2022 08:40:33 - INFO - codeparrot_training - Step 36758: {'lr': 0.00043498632669692, 'samples': 18820608, 'steps': 36758, 'loss/train': 1.9504210948944092} +03/05/2022 08:40:36 - INFO - codeparrot_training - Step 36759: {'lr': 0.0004349827569832469, 'samples': 18821120, 'steps': 36759, 'loss/train': 2.1131386756896973} +03/05/2022 08:40:39 - INFO - codeparrot_training - Step 36760: {'lr': 0.00043497918718622344, 'samples': 18821632, 'steps': 36760, 'loss/train': 1.8500378131866455} +03/05/2022 08:40:41 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 08:40:45 - INFO - codeparrot_training - Step 36761: {'lr': 0.0004349756173058512, 'samples': 18822144, 'steps': 36761, 'loss/train': 2.0238864421844482} +03/05/2022 08:40:48 - INFO - codeparrot_training - Step 36762: {'lr': 0.0004349720473421318, 'samples': 18822656, 'steps': 36762, 'loss/train': 1.518053412437439} +03/05/2022 08:40:49 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/05/2022 08:40:53 - INFO - codeparrot_training - Step 36763: {'lr': 0.00043496847729506685, 'samples': 18823168, 'steps': 36763, 'loss/train': 1.8443150520324707} +03/05/2022 08:40:56 - INFO - codeparrot_training - Step 36764: {'lr': 0.000434964907164658, 'samples': 18823680, 'steps': 36764, 'loss/train': 2.555396556854248} +03/05/2022 08:40:58 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/05/2022 08:41:02 - INFO - codeparrot_training - Step 36765: {'lr': 0.0004349613369509067, 'samples': 18824192, 'steps': 36765, 'loss/train': 1.7638988494873047} +03/05/2022 08:41:05 - INFO - codeparrot_training - Step 36766: {'lr': 0.0004349577666538148, 'samples': 18824704, 'steps': 36766, 'loss/train': 2.237107992172241} +03/05/2022 08:41:06 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/05/2022 08:41:10 - INFO - codeparrot_training - Step 36767: {'lr': 0.0004349541962733837, 'samples': 18825216, 'steps': 36767, 'loss/train': 1.9983699321746826} +03/05/2022 08:41:13 - INFO - codeparrot_training - Step 36768: {'lr': 0.0004349506258096152, 'samples': 18825728, 'steps': 36768, 'loss/train': 1.5734697580337524} +03/05/2022 08:41:15 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 08:41:19 - INFO - codeparrot_training - Step 36769: {'lr': 0.00043494705526251064, 'samples': 18826240, 'steps': 36769, 'loss/train': 1.0660656690597534} +03/05/2022 08:41:22 - INFO - codeparrot_training - Step 36770: {'lr': 0.00043494348463207197, 'samples': 18826752, 'steps': 36770, 'loss/train': 1.5581581592559814} +03/05/2022 08:41:23 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 08:41:27 - INFO - codeparrot_training - Step 36771: {'lr': 0.0004349399139183005, 'samples': 18827264, 'steps': 36771, 'loss/train': 0.9426736831665039} +03/05/2022 08:41:30 - INFO - codeparrot_training - Step 36772: {'lr': 0.000434936343121198, 'samples': 18827776, 'steps': 36772, 'loss/train': 1.6663047075271606} +03/05/2022 08:41:32 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/05/2022 08:41:35 - INFO - codeparrot_training - Step 36773: {'lr': 0.000434932772240766, 'samples': 18828288, 'steps': 36773, 'loss/train': 0.2263745814561844} +03/05/2022 08:41:39 - INFO - codeparrot_training - Step 36774: {'lr': 0.0004349292012770062, 'samples': 18828800, 'steps': 36774, 'loss/train': 1.420348048210144} +03/05/2022 08:41:40 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 08:41:44 - INFO - codeparrot_training - Step 36775: {'lr': 0.00043492563022992013, 'samples': 18829312, 'steps': 36775, 'loss/train': 1.5060628652572632} +03/05/2022 08:41:47 - INFO - codeparrot_training - Step 36776: {'lr': 0.00043492205909950943, 'samples': 18829824, 'steps': 36776, 'loss/train': 1.8419787883758545} +03/05/2022 08:41:48 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/05/2022 08:41:52 - INFO - codeparrot_training - Step 36777: {'lr': 0.0004349184878857757, 'samples': 18830336, 'steps': 36777, 'loss/train': 1.6692277193069458} +03/05/2022 08:41:56 - INFO - codeparrot_training - Step 36778: {'lr': 0.0004349149165887205, 'samples': 18830848, 'steps': 36778, 'loss/train': 1.6683770418167114} +03/05/2022 08:41:57 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 08:42:01 - INFO - codeparrot_training - Step 36779: {'lr': 0.0004349113452083456, 'samples': 18831360, 'steps': 36779, 'loss/train': 0.6113933324813843} +03/05/2022 08:42:04 - INFO - codeparrot_training - Step 36780: {'lr': 0.00043490777374465244, 'samples': 18831872, 'steps': 36780, 'loss/train': 1.6047180891036987} +03/05/2022 08:42:05 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/05/2022 08:42:09 - INFO - codeparrot_training - Step 36781: {'lr': 0.0004349042021976427, 'samples': 18832384, 'steps': 36781, 'loss/train': 1.9414072036743164} +03/05/2022 08:42:12 - INFO - codeparrot_training - Step 36782: {'lr': 0.000434900630567318, 'samples': 18832896, 'steps': 36782, 'loss/train': 1.4783483743667603} +03/05/2022 08:42:14 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 08:42:18 - INFO - codeparrot_training - Step 36783: {'lr': 0.00043489705885367986, 'samples': 18833408, 'steps': 36783, 'loss/train': 1.5761252641677856} +03/05/2022 08:42:21 - INFO - codeparrot_training - Step 36784: {'lr': 0.00043489348705673, 'samples': 18833920, 'steps': 36784, 'loss/train': 2.3001670837402344} +03/05/2022 08:42:22 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 08:42:27 - INFO - codeparrot_training - Step 36785: {'lr': 0.00043488991517647, 'samples': 18834432, 'steps': 36785, 'loss/train': 2.4192380905151367} +03/05/2022 08:42:30 - INFO - codeparrot_training - Step 36786: {'lr': 0.00043488634321290146, 'samples': 18834944, 'steps': 36786, 'loss/train': 1.9931823015213013} +03/05/2022 08:42:33 - INFO - codeparrot_training - Step 36787: {'lr': 0.000434882771166026, 'samples': 18835456, 'steps': 36787, 'loss/train': 0.9560977816581726} +03/05/2022 08:42:33 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/05/2022 08:42:38 - INFO - codeparrot_training - Step 36788: {'lr': 0.00043487919903584515, 'samples': 18835968, 'steps': 36788, 'loss/train': 1.1297003030776978} +03/05/2022 08:42:41 - INFO - codeparrot_training - Step 36789: {'lr': 0.00043487562682236066, 'samples': 18836480, 'steps': 36789, 'loss/train': 1.416741132736206} +03/05/2022 08:42:41 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 08:42:47 - INFO - codeparrot_training - Step 36790: {'lr': 0.000434872054525574, 'samples': 18836992, 'steps': 36790, 'loss/train': 1.4848045110702515} +03/05/2022 08:42:49 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 08:42:52 - INFO - codeparrot_training - Step 36791: {'lr': 0.00043486848214548693, 'samples': 18837504, 'steps': 36791, 'loss/train': 1.6882257461547852} +03/05/2022 08:42:55 - INFO - codeparrot_training - Step 36792: {'lr': 0.0004348649096821009, 'samples': 18838016, 'steps': 36792, 'loss/train': 1.5576146841049194} +03/05/2022 08:42:58 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 08:43:00 - INFO - codeparrot_training - Step 36793: {'lr': 0.0004348613371354176, 'samples': 18838528, 'steps': 36793, 'loss/train': 1.9979488849639893} +03/05/2022 08:43:04 - INFO - codeparrot_training - Step 36794: {'lr': 0.0004348577645054387, 'samples': 18839040, 'steps': 36794, 'loss/train': 2.000835657119751} +03/05/2022 08:43:06 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/05/2022 08:43:09 - INFO - codeparrot_training - Step 36795: {'lr': 0.0004348541917921657, 'samples': 18839552, 'steps': 36795, 'loss/train': 1.9691299200057983} +03/05/2022 08:43:12 - INFO - codeparrot_training - Step 36796: {'lr': 0.0004348506189956002, 'samples': 18840064, 'steps': 36796, 'loss/train': 2.1404619216918945} +03/05/2022 08:43:15 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 08:43:17 - INFO - codeparrot_training - Step 36797: {'lr': 0.0004348470461157439, 'samples': 18840576, 'steps': 36797, 'loss/train': 1.600678563117981} +03/05/2022 08:43:21 - INFO - codeparrot_training - Step 36798: {'lr': 0.0004348434731525984, 'samples': 18841088, 'steps': 36798, 'loss/train': 1.7130168676376343} +03/05/2022 08:43:23 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 08:43:26 - INFO - codeparrot_training - Step 36799: {'lr': 0.00043483990010616524, 'samples': 18841600, 'steps': 36799, 'loss/train': 1.6656779050827026} +03/05/2022 08:43:29 - INFO - codeparrot_training - Step 36800: {'lr': 0.00043483632697644616, 'samples': 18842112, 'steps': 36800, 'loss/train': 1.653520107269287} +03/05/2022 08:43:32 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 08:43:34 - INFO - codeparrot_training - Step 36801: {'lr': 0.00043483275376344257, 'samples': 18842624, 'steps': 36801, 'loss/train': 0.8258717656135559} +03/05/2022 08:43:37 - INFO - codeparrot_training - Step 36802: {'lr': 0.00043482918046715627, 'samples': 18843136, 'steps': 36802, 'loss/train': 0.6811335682868958} +03/05/2022 08:43:40 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 08:43:43 - INFO - codeparrot_training - Step 36803: {'lr': 0.00043482560708758876, 'samples': 18843648, 'steps': 36803, 'loss/train': 1.8351478576660156} +03/05/2022 08:43:46 - INFO - codeparrot_training - Step 36804: {'lr': 0.0004348220336247417, 'samples': 18844160, 'steps': 36804, 'loss/train': 2.422506332397461} +03/05/2022 08:43:48 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 08:43:51 - INFO - codeparrot_training - Step 36805: {'lr': 0.0004348184600786167, 'samples': 18844672, 'steps': 36805, 'loss/train': 1.8859761953353882} +03/05/2022 08:43:54 - INFO - codeparrot_training - Step 36806: {'lr': 0.0004348148864492153, 'samples': 18845184, 'steps': 36806, 'loss/train': 2.145763635635376} +03/05/2022 08:43:57 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/05/2022 08:44:00 - INFO - codeparrot_training - Step 36807: {'lr': 0.00043481131273653926, 'samples': 18845696, 'steps': 36807, 'loss/train': 1.997817039489746} +03/05/2022 08:44:03 - INFO - codeparrot_training - Step 36808: {'lr': 0.00043480773894059, 'samples': 18846208, 'steps': 36808, 'loss/train': 1.3425543308258057} +03/05/2022 08:44:05 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 08:44:08 - INFO - codeparrot_training - Step 36809: {'lr': 0.0004348041650613692, 'samples': 18846720, 'steps': 36809, 'loss/train': 1.0338274240493774} +03/05/2022 08:44:11 - INFO - codeparrot_training - Step 36810: {'lr': 0.0004348005910988786, 'samples': 18847232, 'steps': 36810, 'loss/train': 1.562526822090149} +03/05/2022 08:44:13 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 08:44:16 - INFO - codeparrot_training - Step 36811: {'lr': 0.0004347970170531197, 'samples': 18847744, 'steps': 36811, 'loss/train': 1.9032257795333862} +03/05/2022 08:44:20 - INFO - codeparrot_training - Step 36812: {'lr': 0.000434793442924094, 'samples': 18848256, 'steps': 36812, 'loss/train': 0.7825396656990051} +03/05/2022 08:44:21 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 08:44:25 - INFO - codeparrot_training - Step 36813: {'lr': 0.0004347898687118033, 'samples': 18848768, 'steps': 36813, 'loss/train': 1.714695692062378} +03/05/2022 08:44:28 - INFO - codeparrot_training - Step 36814: {'lr': 0.0004347862944162492, 'samples': 18849280, 'steps': 36814, 'loss/train': 1.0270723104476929} +03/05/2022 08:44:30 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/05/2022 08:44:33 - INFO - codeparrot_training - Step 36815: {'lr': 0.00043478272003743315, 'samples': 18849792, 'steps': 36815, 'loss/train': 1.754807710647583} +03/05/2022 08:44:37 - INFO - codeparrot_training - Step 36816: {'lr': 0.0004347791455753569, 'samples': 18850304, 'steps': 36816, 'loss/train': 1.785197138786316} +03/05/2022 08:44:39 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 08:44:42 - INFO - codeparrot_training - Step 36817: {'lr': 0.00043477557103002197, 'samples': 18850816, 'steps': 36817, 'loss/train': 1.464093804359436} +03/05/2022 08:44:45 - INFO - codeparrot_training - Step 36818: {'lr': 0.00043477199640143004, 'samples': 18851328, 'steps': 36818, 'loss/train': 2.0849695205688477} +03/05/2022 08:44:47 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/05/2022 08:44:50 - INFO - codeparrot_training - Step 36819: {'lr': 0.00043476842168958276, 'samples': 18851840, 'steps': 36819, 'loss/train': 1.8952441215515137} +03/05/2022 08:44:53 - INFO - codeparrot_training - Step 36820: {'lr': 0.0004347648468944816, 'samples': 18852352, 'steps': 36820, 'loss/train': 1.5244258642196655} +03/05/2022 08:44:56 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/05/2022 08:44:59 - INFO - codeparrot_training - Step 36821: {'lr': 0.0004347612720161283, 'samples': 18852864, 'steps': 36821, 'loss/train': 2.0413427352905273} +03/05/2022 08:45:02 - INFO - codeparrot_training - Step 36822: {'lr': 0.00043475769705452437, 'samples': 18853376, 'steps': 36822, 'loss/train': 2.0477137565612793} +03/05/2022 08:45:04 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 08:45:07 - INFO - codeparrot_training - Step 36823: {'lr': 0.00043475412200967155, 'samples': 18853888, 'steps': 36823, 'loss/train': 1.9837555885314941} +03/05/2022 08:45:10 - INFO - codeparrot_training - Step 36824: {'lr': 0.00043475054688157136, 'samples': 18854400, 'steps': 36824, 'loss/train': 1.7392351627349854} +03/05/2022 08:45:13 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 08:45:16 - INFO - codeparrot_training - Step 36825: {'lr': 0.00043474697167022536, 'samples': 18854912, 'steps': 36825, 'loss/train': 1.266484260559082} +03/05/2022 08:45:19 - INFO - codeparrot_training - Step 36826: {'lr': 0.0004347433963756353, 'samples': 18855424, 'steps': 36826, 'loss/train': 1.501827597618103} +03/05/2022 08:45:21 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 08:45:24 - INFO - codeparrot_training - Step 36827: {'lr': 0.0004347398209978027, 'samples': 18855936, 'steps': 36827, 'loss/train': 2.126845121383667} +03/05/2022 08:45:27 - INFO - codeparrot_training - Step 36828: {'lr': 0.0004347362455367292, 'samples': 18856448, 'steps': 36828, 'loss/train': 1.9292513132095337} +03/05/2022 08:45:29 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 08:45:32 - INFO - codeparrot_training - Step 36829: {'lr': 0.0004347326699924163, 'samples': 18856960, 'steps': 36829, 'loss/train': 2.057068347930908} +03/05/2022 08:45:36 - INFO - codeparrot_training - Step 36830: {'lr': 0.0004347290943648658, 'samples': 18857472, 'steps': 36830, 'loss/train': 2.2075035572052} +03/05/2022 08:45:37 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 08:45:41 - INFO - codeparrot_training - Step 36831: {'lr': 0.00043472551865407917, 'samples': 18857984, 'steps': 36831, 'loss/train': 1.630979299545288} +03/05/2022 08:45:44 - INFO - codeparrot_training - Step 36832: {'lr': 0.0004347219428600581, 'samples': 18858496, 'steps': 36832, 'loss/train': 1.6228761672973633} +03/05/2022 08:45:46 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/05/2022 08:45:49 - INFO - codeparrot_training - Step 36833: {'lr': 0.0004347183669828042, 'samples': 18859008, 'steps': 36833, 'loss/train': 1.7341316938400269} +03/05/2022 08:45:52 - INFO - codeparrot_training - Step 36834: {'lr': 0.00043471479102231904, 'samples': 18859520, 'steps': 36834, 'loss/train': 2.6173717975616455} +03/05/2022 08:45:54 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/05/2022 08:45:58 - INFO - codeparrot_training - Step 36835: {'lr': 0.0004347112149786042, 'samples': 18860032, 'steps': 36835, 'loss/train': 1.555933952331543} +03/05/2022 08:46:01 - INFO - codeparrot_training - Step 36836: {'lr': 0.0004347076388516614, 'samples': 18860544, 'steps': 36836, 'loss/train': 0.9737286567687988} +03/05/2022 08:46:02 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 08:46:06 - INFO - codeparrot_training - Step 36837: {'lr': 0.00043470406264149215, 'samples': 18861056, 'steps': 36837, 'loss/train': 2.0914418697357178} +03/05/2022 08:46:09 - INFO - codeparrot_training - Step 36838: {'lr': 0.00043470048634809813, 'samples': 18861568, 'steps': 36838, 'loss/train': 1.548585057258606} +03/05/2022 08:46:11 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 08:46:15 - INFO - codeparrot_training - Step 36839: {'lr': 0.00043469690997148086, 'samples': 18862080, 'steps': 36839, 'loss/train': 1.297490119934082} +03/05/2022 08:46:18 - INFO - codeparrot_training - Step 36840: {'lr': 0.00043469333351164207, 'samples': 18862592, 'steps': 36840, 'loss/train': 1.2585265636444092} +03/05/2022 08:46:19 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 08:46:23 - INFO - codeparrot_training - Step 36841: {'lr': 0.0004346897569685833, 'samples': 18863104, 'steps': 36841, 'loss/train': 1.389769434928894} +03/05/2022 08:46:26 - INFO - codeparrot_training - Step 36842: {'lr': 0.00043468618034230613, 'samples': 18863616, 'steps': 36842, 'loss/train': 1.8585312366485596} +03/05/2022 08:46:27 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 08:46:31 - INFO - codeparrot_training - Step 36843: {'lr': 0.00043468260363281234, 'samples': 18864128, 'steps': 36843, 'loss/train': 1.4459878206253052} +03/05/2022 08:46:34 - INFO - codeparrot_training - Step 36844: {'lr': 0.0004346790268401033, 'samples': 18864640, 'steps': 36844, 'loss/train': 1.455426573753357} +03/05/2022 08:46:36 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/05/2022 08:46:40 - INFO - codeparrot_training - Step 36845: {'lr': 0.00043467544996418075, 'samples': 18865152, 'steps': 36845, 'loss/train': 1.1576275825500488} +03/05/2022 08:46:43 - INFO - codeparrot_training - Step 36846: {'lr': 0.0004346718730050463, 'samples': 18865664, 'steps': 36846, 'loss/train': 1.2133681774139404} +03/05/2022 08:46:44 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 08:46:48 - INFO - codeparrot_training - Step 36847: {'lr': 0.0004346682959627016, 'samples': 18866176, 'steps': 36847, 'loss/train': 1.1080384254455566} +03/05/2022 08:46:51 - INFO - codeparrot_training - Step 36848: {'lr': 0.0004346647188371482, 'samples': 18866688, 'steps': 36848, 'loss/train': 1.8695937395095825} +03/05/2022 08:46:53 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/05/2022 08:46:57 - INFO - codeparrot_training - Step 36849: {'lr': 0.00043466114162838765, 'samples': 18867200, 'steps': 36849, 'loss/train': 1.1918002367019653} +03/05/2022 08:47:00 - INFO - codeparrot_training - Step 36850: {'lr': 0.00043465756433642175, 'samples': 18867712, 'steps': 36850, 'loss/train': 1.6666258573532104} +03/05/2022 08:47:01 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/05/2022 08:47:05 - INFO - codeparrot_training - Step 36851: {'lr': 0.0004346539869612519, 'samples': 18868224, 'steps': 36851, 'loss/train': 2.2172508239746094} +03/05/2022 08:47:08 - INFO - codeparrot_training - Step 36852: {'lr': 0.0004346504095028799, 'samples': 18868736, 'steps': 36852, 'loss/train': 1.7251343727111816} +03/05/2022 08:47:09 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 08:47:13 - INFO - codeparrot_training - Step 36853: {'lr': 0.00043464683196130726, 'samples': 18869248, 'steps': 36853, 'loss/train': 3.0868568420410156} +03/05/2022 08:47:17 - INFO - codeparrot_training - Step 36854: {'lr': 0.00043464325433653563, 'samples': 18869760, 'steps': 36854, 'loss/train': 0.5785963535308838} +03/05/2022 08:47:17 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 08:47:22 - INFO - codeparrot_training - Step 36855: {'lr': 0.0004346396766285665, 'samples': 18870272, 'steps': 36855, 'loss/train': 1.2966419458389282} +03/05/2022 08:47:25 - INFO - codeparrot_training - Step 36856: {'lr': 0.0004346360988374016, 'samples': 18870784, 'steps': 36856, 'loss/train': 1.8204244375228882} +03/05/2022 08:47:27 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 08:47:30 - INFO - codeparrot_training - Step 36857: {'lr': 0.0004346325209630426, 'samples': 18871296, 'steps': 36857, 'loss/train': 1.231301188468933} +03/05/2022 08:47:34 - INFO - codeparrot_training - Step 36858: {'lr': 0.00043462894300549097, 'samples': 18871808, 'steps': 36858, 'loss/train': 2.390886068344116} +03/05/2022 08:47:35 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 08:47:39 - INFO - codeparrot_training - Step 36859: {'lr': 0.0004346253649647485, 'samples': 18872320, 'steps': 36859, 'loss/train': 1.7816247940063477} +03/05/2022 08:47:42 - INFO - codeparrot_training - Step 36860: {'lr': 0.00043462178684081657, 'samples': 18872832, 'steps': 36860, 'loss/train': 1.5157221555709839} +03/05/2022 08:47:43 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 08:47:47 - INFO - codeparrot_training - Step 36861: {'lr': 0.00043461820863369697, 'samples': 18873344, 'steps': 36861, 'loss/train': 0.31511035561561584} +03/05/2022 08:47:50 - INFO - codeparrot_training - Step 36862: {'lr': 0.0004346146303433912, 'samples': 18873856, 'steps': 36862, 'loss/train': 1.6753982305526733} +03/05/2022 08:47:51 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 08:47:56 - INFO - codeparrot_training - Step 36863: {'lr': 0.00043461105196990093, 'samples': 18874368, 'steps': 36863, 'loss/train': 1.4402275085449219} +03/05/2022 08:47:59 - INFO - codeparrot_training - Step 36864: {'lr': 0.0004346074735132278, 'samples': 18874880, 'steps': 36864, 'loss/train': 1.2896029949188232} +03/05/2022 08:48:00 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 08:48:04 - INFO - codeparrot_training - Step 36865: {'lr': 0.0004346038949733734, 'samples': 18875392, 'steps': 36865, 'loss/train': 1.9609742164611816} +03/05/2022 08:48:07 - INFO - codeparrot_training - Step 36866: {'lr': 0.0004346003163503393, 'samples': 18875904, 'steps': 36866, 'loss/train': 2.161099672317505} +03/05/2022 08:48:08 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 08:48:12 - INFO - codeparrot_training - Step 36867: {'lr': 0.00043459673764412713, 'samples': 18876416, 'steps': 36867, 'loss/train': 1.4677633047103882} +03/05/2022 08:48:16 - INFO - codeparrot_training - Step 36868: {'lr': 0.0004345931588547386, 'samples': 18876928, 'steps': 36868, 'loss/train': 1.6497422456741333} +03/05/2022 08:48:16 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 08:48:21 - INFO - codeparrot_training - Step 36869: {'lr': 0.00043458957998217517, 'samples': 18877440, 'steps': 36869, 'loss/train': 1.5824792385101318} +03/05/2022 08:48:24 - INFO - codeparrot_training - Step 36870: {'lr': 0.0004345860010264385, 'samples': 18877952, 'steps': 36870, 'loss/train': 1.2303905487060547} +03/05/2022 08:48:26 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/05/2022 08:48:30 - INFO - codeparrot_training - Step 36871: {'lr': 0.00043458242198753035, 'samples': 18878464, 'steps': 36871, 'loss/train': 2.087001323699951} +03/05/2022 08:48:33 - INFO - codeparrot_training - Step 36872: {'lr': 0.00043457884286545216, 'samples': 18878976, 'steps': 36872, 'loss/train': 1.6326885223388672} +03/05/2022 08:48:38 - INFO - codeparrot_training - Step 36873: {'lr': 0.0004345752636602055, 'samples': 18879488, 'steps': 36873, 'loss/train': 2.3659703731536865} +03/05/2022 08:48:41 - INFO - codeparrot_training - Step 36874: {'lr': 0.00043457168437179217, 'samples': 18880000, 'steps': 36874, 'loss/train': 2.2185091972351074} +03/05/2022 08:48:43 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/05/2022 08:48:46 - INFO - codeparrot_training - Step 36875: {'lr': 0.00043456810500021363, 'samples': 18880512, 'steps': 36875, 'loss/train': 1.7475706338882446} +03/05/2022 08:48:49 - INFO - codeparrot_training - Step 36876: {'lr': 0.00043456452554547153, 'samples': 18881024, 'steps': 36876, 'loss/train': 2.0513062477111816} +03/05/2022 08:48:51 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 08:48:55 - INFO - codeparrot_training - Step 36877: {'lr': 0.0004345609460075676, 'samples': 18881536, 'steps': 36877, 'loss/train': 2.246486186981201} +03/05/2022 08:48:58 - INFO - codeparrot_training - Step 36878: {'lr': 0.00043455736638650335, 'samples': 18882048, 'steps': 36878, 'loss/train': 1.3590720891952515} +03/05/2022 08:48:59 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/05/2022 08:49:03 - INFO - codeparrot_training - Step 36879: {'lr': 0.0004345537866822803, 'samples': 18882560, 'steps': 36879, 'loss/train': 2.12926983833313} +03/05/2022 08:49:06 - INFO - codeparrot_training - Step 36880: {'lr': 0.0004345502068949002, 'samples': 18883072, 'steps': 36880, 'loss/train': 1.9532108306884766} +03/05/2022 08:49:08 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/05/2022 08:49:12 - INFO - codeparrot_training - Step 36881: {'lr': 0.0004345466270243646, 'samples': 18883584, 'steps': 36881, 'loss/train': 1.7815518379211426} +03/05/2022 08:49:15 - INFO - codeparrot_training - Step 36882: {'lr': 0.0004345430470706753, 'samples': 18884096, 'steps': 36882, 'loss/train': 1.7593061923980713} +03/05/2022 08:49:16 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/05/2022 08:49:20 - INFO - codeparrot_training - Step 36883: {'lr': 0.00043453946703383354, 'samples': 18884608, 'steps': 36883, 'loss/train': 3.3658955097198486} +03/05/2022 08:49:23 - INFO - codeparrot_training - Step 36884: {'lr': 0.00043453588691384125, 'samples': 18885120, 'steps': 36884, 'loss/train': 1.5409979820251465} +03/05/2022 08:49:24 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/05/2022 08:49:28 - INFO - codeparrot_training - Step 36885: {'lr': 0.0004345323067106999, 'samples': 18885632, 'steps': 36885, 'loss/train': 1.5530903339385986} +03/05/2022 08:49:32 - INFO - codeparrot_training - Step 36886: {'lr': 0.00043452872642441124, 'samples': 18886144, 'steps': 36886, 'loss/train': 1.4350181818008423} +03/05/2022 08:49:33 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/05/2022 08:49:37 - INFO - codeparrot_training - Step 36887: {'lr': 0.0004345251460549766, 'samples': 18886656, 'steps': 36887, 'loss/train': 1.5833184719085693} +03/05/2022 08:49:40 - INFO - codeparrot_training - Step 36888: {'lr': 0.0004345215656023979, 'samples': 18887168, 'steps': 36888, 'loss/train': 1.290075421333313} +03/05/2022 08:49:42 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/05/2022 08:49:45 - INFO - codeparrot_training - Step 36889: {'lr': 0.0004345179850666766, 'samples': 18887680, 'steps': 36889, 'loss/train': 1.770676612854004} +03/05/2022 08:49:49 - INFO - codeparrot_training - Step 36890: {'lr': 0.0004345144044478144, 'samples': 18888192, 'steps': 36890, 'loss/train': 1.5681612491607666} +03/05/2022 08:49:50 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 08:49:54 - INFO - codeparrot_training - Step 36891: {'lr': 0.0004345108237458128, 'samples': 18888704, 'steps': 36891, 'loss/train': 2.8903255462646484} +03/05/2022 08:49:57 - INFO - codeparrot_training - Step 36892: {'lr': 0.00043450724296067344, 'samples': 18889216, 'steps': 36892, 'loss/train': 1.852860450744629} +03/05/2022 08:49:59 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/05/2022 08:50:02 - INFO - codeparrot_training - Step 36893: {'lr': 0.00043450366209239803, 'samples': 18889728, 'steps': 36893, 'loss/train': 1.6656923294067383} +03/05/2022 08:50:05 - INFO - codeparrot_training - Step 36894: {'lr': 0.0004345000811409881, 'samples': 18890240, 'steps': 36894, 'loss/train': 1.9010053873062134} +03/05/2022 08:50:07 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 08:50:11 - INFO - codeparrot_training - Step 36895: {'lr': 0.0004344965001064453, 'samples': 18890752, 'steps': 36895, 'loss/train': 1.136331558227539} +03/05/2022 08:50:14 - INFO - codeparrot_training - Step 36896: {'lr': 0.0004344929189887712, 'samples': 18891264, 'steps': 36896, 'loss/train': 1.5710711479187012} +03/05/2022 08:50:15 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/05/2022 08:50:19 - INFO - codeparrot_training - Step 36897: {'lr': 0.0004344893377879674, 'samples': 18891776, 'steps': 36897, 'loss/train': 1.7728002071380615} +03/05/2022 08:50:22 - INFO - codeparrot_training - Step 36898: {'lr': 0.00043448575650403555, 'samples': 18892288, 'steps': 36898, 'loss/train': 1.6509506702423096} +03/05/2022 08:50:24 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 08:50:28 - INFO - codeparrot_training - Step 36899: {'lr': 0.00043448217513697727, 'samples': 18892800, 'steps': 36899, 'loss/train': 1.4198256731033325} +03/05/2022 08:50:31 - INFO - codeparrot_training - Step 36900: {'lr': 0.0004344785936867942, 'samples': 18893312, 'steps': 36900, 'loss/train': 1.507174015045166} +03/05/2022 08:50:32 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/05/2022 08:50:36 - INFO - codeparrot_training - Step 36901: {'lr': 0.00043447501215348794, 'samples': 18893824, 'steps': 36901, 'loss/train': 1.853712558746338} +03/05/2022 08:50:40 - INFO - codeparrot_training - Step 36902: {'lr': 0.00043447143053706007, 'samples': 18894336, 'steps': 36902, 'loss/train': 1.724510669708252} +03/05/2022 08:50:41 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 08:50:45 - INFO - codeparrot_training - Step 36903: {'lr': 0.00043446784883751223, 'samples': 18894848, 'steps': 36903, 'loss/train': 1.555845022201538} +03/05/2022 08:50:48 - INFO - codeparrot_training - Step 36904: {'lr': 0.000434464267054846, 'samples': 18895360, 'steps': 36904, 'loss/train': 1.8463436365127563} +03/05/2022 08:50:50 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/05/2022 08:50:53 - INFO - codeparrot_training - Step 36905: {'lr': 0.000434460685189063, 'samples': 18895872, 'steps': 36905, 'loss/train': 1.5736523866653442} +03/05/2022 08:50:56 - INFO - codeparrot_training - Step 36906: {'lr': 0.0004344571032401649, 'samples': 18896384, 'steps': 36906, 'loss/train': 2.0981554985046387} +03/05/2022 08:50:58 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 08:51:02 - INFO - codeparrot_training - Step 36907: {'lr': 0.0004344535212081533, 'samples': 18896896, 'steps': 36907, 'loss/train': 0.8352473974227905} +03/05/2022 08:51:05 - INFO - codeparrot_training - Step 36908: {'lr': 0.0004344499390930298, 'samples': 18897408, 'steps': 36908, 'loss/train': 1.240767240524292} +03/05/2022 08:51:06 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/05/2022 08:51:10 - INFO - codeparrot_training - Step 36909: {'lr': 0.0004344463568947959, 'samples': 18897920, 'steps': 36909, 'loss/train': 1.0035650730133057} +03/05/2022 08:51:13 - INFO - codeparrot_training - Step 36910: {'lr': 0.0004344427746134534, 'samples': 18898432, 'steps': 36910, 'loss/train': 2.763331890106201} +03/05/2022 08:51:15 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/05/2022 08:51:19 - INFO - codeparrot_training - Step 36911: {'lr': 0.0004344391922490037, 'samples': 18898944, 'steps': 36911, 'loss/train': 0.28242018818855286} +03/05/2022 08:51:22 - INFO - codeparrot_training - Step 36912: {'lr': 0.0004344356098014487, 'samples': 18899456, 'steps': 36912, 'loss/train': 1.8394434452056885} +03/05/2022 08:51:23 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/05/2022 08:51:27 - INFO - codeparrot_training - Step 36913: {'lr': 0.0004344320272707898, 'samples': 18899968, 'steps': 36913, 'loss/train': 1.5607521533966064} +03/05/2022 08:51:30 - INFO - codeparrot_training - Step 36914: {'lr': 0.0004344284446570287, 'samples': 18900480, 'steps': 36914, 'loss/train': 1.938348412513733} +03/05/2022 08:51:32 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 08:51:36 - INFO - codeparrot_training - Step 36915: {'lr': 0.00043442486196016697, 'samples': 18900992, 'steps': 36915, 'loss/train': 0.8162152171134949} +03/05/2022 08:51:39 - INFO - codeparrot_training - Step 36916: {'lr': 0.00043442127918020624, 'samples': 18901504, 'steps': 36916, 'loss/train': 1.7783260345458984} +03/05/2022 08:51:41 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/05/2022 08:51:44 - INFO - codeparrot_training - Step 36917: {'lr': 0.00043441769631714813, 'samples': 18902016, 'steps': 36917, 'loss/train': 1.477062702178955} +03/05/2022 08:51:47 - INFO - codeparrot_training - Step 36918: {'lr': 0.0004344141133709943, 'samples': 18902528, 'steps': 36918, 'loss/train': 1.8917760848999023} +03/05/2022 08:51:49 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 08:51:53 - INFO - codeparrot_training - Step 36919: {'lr': 0.00043441053034174625, 'samples': 18903040, 'steps': 36919, 'loss/train': 1.86081862449646} +03/05/2022 08:51:56 - INFO - codeparrot_training - Step 36920: {'lr': 0.00043440694722940567, 'samples': 18903552, 'steps': 36920, 'loss/train': 0.9477598667144775} +03/05/2022 08:51:58 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/05/2022 08:52:01 - INFO - codeparrot_training - Step 36921: {'lr': 0.00043440336403397417, 'samples': 18904064, 'steps': 36921, 'loss/train': 2.010540008544922} +03/05/2022 08:52:04 - INFO - codeparrot_training - Step 36922: {'lr': 0.00043439978075545337, 'samples': 18904576, 'steps': 36922, 'loss/train': 1.3981226682662964} +03/05/2022 08:52:06 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/05/2022 08:52:09 - INFO - codeparrot_training - Step 36923: {'lr': 0.0004343961973938449, 'samples': 18905088, 'steps': 36923, 'loss/train': 0.2985639274120331} +03/05/2022 08:52:13 - INFO - codeparrot_training - Step 36924: {'lr': 0.00043439261394915033, 'samples': 18905600, 'steps': 36924, 'loss/train': 1.7021766901016235} +03/05/2022 08:52:15 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/05/2022 08:52:18 - INFO - codeparrot_training - Step 36925: {'lr': 0.0004343890304213713, 'samples': 18906112, 'steps': 36925, 'loss/train': 2.1532468795776367} +03/05/2022 08:52:21 - INFO - codeparrot_training - Step 36926: {'lr': 0.0004343854468105094, 'samples': 18906624, 'steps': 36926, 'loss/train': 0.13509312272071838} +03/05/2022 08:52:23 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 08:52:26 - INFO - codeparrot_training - Step 36927: {'lr': 0.00043438186311656624, 'samples': 18907136, 'steps': 36927, 'loss/train': 1.26718270778656} +03/05/2022 08:52:29 - INFO - codeparrot_training - Step 36928: {'lr': 0.0004343782793395435, 'samples': 18907648, 'steps': 36928, 'loss/train': 1.7199846506118774} +03/05/2022 08:52:31 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/05/2022 08:52:35 - INFO - codeparrot_training - Step 36929: {'lr': 0.00043437469547944277, 'samples': 18908160, 'steps': 36929, 'loss/train': 0.8998398184776306} +03/05/2022 08:52:38 - INFO - codeparrot_training - Step 36930: {'lr': 0.0004343711115362656, 'samples': 18908672, 'steps': 36930, 'loss/train': 1.9931837320327759} +03/05/2022 08:52:40 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/05/2022 08:52:43 - INFO - codeparrot_training - Step 36931: {'lr': 0.00043436752751001365, 'samples': 18909184, 'steps': 36931, 'loss/train': 2.208766222000122} +03/05/2022 08:52:46 - INFO - codeparrot_training - Step 36932: {'lr': 0.0004343639434006885, 'samples': 18909696, 'steps': 36932, 'loss/train': 2.4634196758270264} +03/05/2022 08:52:48 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 08:52:52 - INFO - codeparrot_training - Step 36933: {'lr': 0.00043436035920829186, 'samples': 18910208, 'steps': 36933, 'loss/train': 1.3468936681747437} +03/05/2022 08:52:55 - INFO - codeparrot_training - Step 36934: {'lr': 0.0004343567749328253, 'samples': 18910720, 'steps': 36934, 'loss/train': 1.2916488647460938} +03/05/2022 08:52:56 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/05/2022 08:53:00 - INFO - codeparrot_training - Step 36935: {'lr': 0.00043435319057429046, 'samples': 18911232, 'steps': 36935, 'loss/train': 0.9071429967880249} +03/05/2022 08:53:03 - INFO - codeparrot_training - Step 36936: {'lr': 0.0004343496061326888, 'samples': 18911744, 'steps': 36936, 'loss/train': 1.9391156435012817} +03/05/2022 08:53:05 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/05/2022 08:53:09 - INFO - codeparrot_training - Step 36937: {'lr': 0.0004343460216080221, 'samples': 18912256, 'steps': 36937, 'loss/train': 2.0201051235198975} +03/05/2022 08:53:12 - INFO - codeparrot_training - Step 36938: {'lr': 0.00043434243700029196, 'samples': 18912768, 'steps': 36938, 'loss/train': 1.5541919469833374} +03/05/2022 08:53:14 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 08:53:17 - INFO - codeparrot_training - Step 36939: {'lr': 0.0004343388523095, 'samples': 18913280, 'steps': 36939, 'loss/train': 1.6733759641647339} +03/05/2022 08:53:20 - INFO - codeparrot_training - Step 36940: {'lr': 0.00043433526753564766, 'samples': 18913792, 'steps': 36940, 'loss/train': 1.7549951076507568} +03/05/2022 08:53:22 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/05/2022 08:53:25 - INFO - codeparrot_training - Step 36941: {'lr': 0.00043433168267873677, 'samples': 18914304, 'steps': 36941, 'loss/train': 1.5112932920455933} +03/05/2022 08:53:29 - INFO - codeparrot_training - Step 36942: {'lr': 0.0004343280977387689, 'samples': 18914816, 'steps': 36942, 'loss/train': 1.7803874015808105} +03/05/2022 08:53:30 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/05/2022 08:53:34 - INFO - codeparrot_training - Step 36943: {'lr': 0.0004343245127157456, 'samples': 18915328, 'steps': 36943, 'loss/train': 1.4912039041519165} +03/05/2022 08:53:37 - INFO - codeparrot_training - Step 36944: {'lr': 0.0004343209276096686, 'samples': 18915840, 'steps': 36944, 'loss/train': 1.2793132066726685} +03/05/2022 08:53:39 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 08:53:42 - INFO - codeparrot_training - Step 36945: {'lr': 0.00043431734242053933, 'samples': 18916352, 'steps': 36945, 'loss/train': 1.7445441484451294} +03/05/2022 08:53:46 - INFO - codeparrot_training - Step 36946: {'lr': 0.0004343137571483595, 'samples': 18916864, 'steps': 36946, 'loss/train': 1.3764079809188843} +03/05/2022 08:53:47 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/05/2022 08:53:51 - INFO - codeparrot_training - Step 36947: {'lr': 0.00043431017179313075, 'samples': 18917376, 'steps': 36947, 'loss/train': 1.7874045372009277} +03/05/2022 08:53:54 - INFO - codeparrot_training - Step 36948: {'lr': 0.0004343065863548548, 'samples': 18917888, 'steps': 36948, 'loss/train': 1.9605122804641724} +03/05/2022 08:53:55 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/05/2022 08:53:59 - INFO - codeparrot_training - Step 36949: {'lr': 0.000434303000833533, 'samples': 18918400, 'steps': 36949, 'loss/train': 1.806005597114563} +03/05/2022 08:54:02 - INFO - codeparrot_training - Step 36950: {'lr': 0.00043429941522916715, 'samples': 18918912, 'steps': 36950, 'loss/train': 1.300453543663025} +03/05/2022 08:54:04 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 08:54:08 - INFO - codeparrot_training - Step 36951: {'lr': 0.0004342958295417588, 'samples': 18919424, 'steps': 36951, 'loss/train': 2.21132230758667} +03/05/2022 08:54:11 - INFO - codeparrot_training - Step 36952: {'lr': 0.00043429224377130964, 'samples': 18919936, 'steps': 36952, 'loss/train': 1.340383529663086} +03/05/2022 08:54:12 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 08:54:16 - INFO - codeparrot_training - Step 36953: {'lr': 0.00043428865791782126, 'samples': 18920448, 'steps': 36953, 'loss/train': 1.6192365884780884} +03/05/2022 08:54:19 - INFO - codeparrot_training - Step 36954: {'lr': 0.0004342850719812952, 'samples': 18920960, 'steps': 36954, 'loss/train': 0.864309549331665} +03/05/2022 08:54:20 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/05/2022 08:54:25 - INFO - codeparrot_training - Step 36955: {'lr': 0.00043428148596173316, 'samples': 18921472, 'steps': 36955, 'loss/train': 1.9933805465698242} +03/05/2022 08:54:28 - INFO - codeparrot_training - Step 36956: {'lr': 0.00043427789985913675, 'samples': 18921984, 'steps': 36956, 'loss/train': 1.7541875839233398} +03/05/2022 08:54:29 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 08:54:33 - INFO - codeparrot_training - Step 36957: {'lr': 0.00043427431367350753, 'samples': 18922496, 'steps': 36957, 'loss/train': 1.5491714477539062} +03/05/2022 08:54:36 - INFO - codeparrot_training - Step 36958: {'lr': 0.0004342707274048472, 'samples': 18923008, 'steps': 36958, 'loss/train': 1.2090696096420288} +03/05/2022 08:54:38 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/05/2022 08:54:41 - INFO - codeparrot_training - Step 36959: {'lr': 0.0004342671410531572, 'samples': 18923520, 'steps': 36959, 'loss/train': 1.5083037614822388} +03/05/2022 08:54:45 - INFO - codeparrot_training - Step 36960: {'lr': 0.00043426355461843934, 'samples': 18924032, 'steps': 36960, 'loss/train': 1.7884770631790161} +03/05/2022 08:54:46 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 08:54:50 - INFO - codeparrot_training - Step 36961: {'lr': 0.00043425996810069525, 'samples': 18924544, 'steps': 36961, 'loss/train': 1.6233292818069458} +03/05/2022 08:54:53 - INFO - codeparrot_training - Step 36962: {'lr': 0.0004342563814999264, 'samples': 18925056, 'steps': 36962, 'loss/train': 2.9792420864105225} +03/05/2022 08:54:55 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 08:54:58 - INFO - codeparrot_training - Step 36963: {'lr': 0.0004342527948161344, 'samples': 18925568, 'steps': 36963, 'loss/train': 1.538663387298584} +03/05/2022 08:55:02 - INFO - codeparrot_training - Step 36964: {'lr': 0.000434249208049321, 'samples': 18926080, 'steps': 36964, 'loss/train': 2.0222537517547607} +03/05/2022 08:55:03 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/05/2022 08:55:07 - INFO - codeparrot_training - Step 36965: {'lr': 0.0004342456211994877, 'samples': 18926592, 'steps': 36965, 'loss/train': 1.4356895685195923} +03/05/2022 08:55:10 - INFO - codeparrot_training - Step 36966: {'lr': 0.00043424203426663623, 'samples': 18927104, 'steps': 36966, 'loss/train': 2.4743618965148926} +03/05/2022 08:55:11 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/05/2022 08:55:15 - INFO - codeparrot_training - Step 36967: {'lr': 0.0004342384472507681, 'samples': 18927616, 'steps': 36967, 'loss/train': 1.3574460744857788} +03/05/2022 08:55:18 - INFO - codeparrot_training - Step 36968: {'lr': 0.00043423486015188497, 'samples': 18928128, 'steps': 36968, 'loss/train': 1.7307296991348267} +03/05/2022 08:55:19 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 08:55:24 - INFO - codeparrot_training - Step 36969: {'lr': 0.00043423127296998845, 'samples': 18928640, 'steps': 36969, 'loss/train': 1.3685864210128784} +03/05/2022 08:55:27 - INFO - codeparrot_training - Step 36970: {'lr': 0.0004342276857050802, 'samples': 18929152, 'steps': 36970, 'loss/train': 2.4496963024139404} +03/05/2022 08:55:28 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 08:55:32 - INFO - codeparrot_training - Step 36971: {'lr': 0.00043422409835716175, 'samples': 18929664, 'steps': 36971, 'loss/train': 1.1316324472427368} +03/05/2022 08:55:35 - INFO - codeparrot_training - Step 36972: {'lr': 0.00043422051092623483, 'samples': 18930176, 'steps': 36972, 'loss/train': 1.441063404083252} +03/05/2022 08:55:37 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 08:55:41 - INFO - codeparrot_training - Step 36973: {'lr': 0.0004342169234123009, 'samples': 18930688, 'steps': 36973, 'loss/train': 0.9314203858375549} +03/05/2022 08:55:44 - INFO - codeparrot_training - Step 36974: {'lr': 0.0004342133358153617, 'samples': 18931200, 'steps': 36974, 'loss/train': 0.6991536617279053} +03/05/2022 08:55:45 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/05/2022 08:55:49 - INFO - codeparrot_training - Step 36975: {'lr': 0.0004342097481354189, 'samples': 18931712, 'steps': 36975, 'loss/train': 2.3606317043304443} +03/05/2022 08:55:52 - INFO - codeparrot_training - Step 36976: {'lr': 0.00043420616037247395, 'samples': 18932224, 'steps': 36976, 'loss/train': 0.542598307132721} +03/05/2022 08:55:54 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 08:55:57 - INFO - codeparrot_training - Step 36977: {'lr': 0.0004342025725265285, 'samples': 18932736, 'steps': 36977, 'loss/train': 1.2886065244674683} +03/05/2022 08:56:01 - INFO - codeparrot_training - Step 36978: {'lr': 0.00043419898459758435, 'samples': 18933248, 'steps': 36978, 'loss/train': 1.5825302600860596} +03/05/2022 08:56:02 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 08:56:06 - INFO - codeparrot_training - Step 36979: {'lr': 0.00043419539658564286, 'samples': 18933760, 'steps': 36979, 'loss/train': 1.5334430932998657} +03/05/2022 08:56:09 - INFO - codeparrot_training - Step 36980: {'lr': 0.0004341918084907058, 'samples': 18934272, 'steps': 36980, 'loss/train': 1.674171805381775} +03/05/2022 08:56:11 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 08:56:14 - INFO - codeparrot_training - Step 36981: {'lr': 0.0004341882203127747, 'samples': 18934784, 'steps': 36981, 'loss/train': 2.4014501571655273} +03/05/2022 08:56:17 - INFO - codeparrot_training - Step 36982: {'lr': 0.00043418463205185134, 'samples': 18935296, 'steps': 36982, 'loss/train': 1.3597460985183716} +03/05/2022 08:56:19 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/05/2022 08:56:23 - INFO - codeparrot_training - Step 36983: {'lr': 0.0004341810437079372, 'samples': 18935808, 'steps': 36983, 'loss/train': 1.4551383256912231} +03/05/2022 08:56:26 - INFO - codeparrot_training - Step 36984: {'lr': 0.0004341774552810339, 'samples': 18936320, 'steps': 36984, 'loss/train': 2.1680328845977783} +03/05/2022 08:56:27 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 08:56:31 - INFO - codeparrot_training - Step 36985: {'lr': 0.0004341738667711431, 'samples': 18936832, 'steps': 36985, 'loss/train': 1.3458517789840698} +03/05/2022 08:56:34 - INFO - codeparrot_training - Step 36986: {'lr': 0.0004341702781782664, 'samples': 18937344, 'steps': 36986, 'loss/train': 1.6814838647842407} +03/05/2022 08:56:36 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/05/2022 08:56:40 - INFO - codeparrot_training - Step 36987: {'lr': 0.00043416668950240536, 'samples': 18937856, 'steps': 36987, 'loss/train': 0.5219226479530334} +03/05/2022 08:56:43 - INFO - codeparrot_training - Step 36988: {'lr': 0.0004341631007435617, 'samples': 18938368, 'steps': 36988, 'loss/train': 1.1871252059936523} +03/05/2022 08:56:44 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 08:56:48 - INFO - codeparrot_training - Step 36989: {'lr': 0.00043415951190173697, 'samples': 18938880, 'steps': 36989, 'loss/train': 1.1446117162704468} +03/05/2022 08:56:51 - INFO - codeparrot_training - Step 36990: {'lr': 0.00043415592297693276, 'samples': 18939392, 'steps': 36990, 'loss/train': 1.5315895080566406} +03/05/2022 08:56:53 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 08:56:57 - INFO - codeparrot_training - Step 36991: {'lr': 0.00043415233396915077, 'samples': 18939904, 'steps': 36991, 'loss/train': 1.7417967319488525} +03/05/2022 08:57:00 - INFO - codeparrot_training - Step 36992: {'lr': 0.0004341487448783926, 'samples': 18940416, 'steps': 36992, 'loss/train': 1.8231290578842163} +03/05/2022 08:57:01 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 08:57:05 - INFO - codeparrot_training - Step 36993: {'lr': 0.00043414515570465987, 'samples': 18940928, 'steps': 36993, 'loss/train': 1.130030632019043} +03/05/2022 08:57:08 - INFO - codeparrot_training - Step 36994: {'lr': 0.0004341415664479541, 'samples': 18941440, 'steps': 36994, 'loss/train': 0.88246750831604} +03/05/2022 08:57:09 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/05/2022 08:57:13 - INFO - codeparrot_training - Step 36995: {'lr': 0.00043413797710827707, 'samples': 18941952, 'steps': 36995, 'loss/train': 2.098619222640991} +03/05/2022 08:57:17 - INFO - codeparrot_training - Step 36996: {'lr': 0.00043413438768563026, 'samples': 18942464, 'steps': 36996, 'loss/train': 1.8001807928085327} +03/05/2022 08:57:18 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 08:57:22 - INFO - codeparrot_training - Step 36997: {'lr': 0.0004341307981800153, 'samples': 18942976, 'steps': 36997, 'loss/train': 1.505990982055664} +03/05/2022 08:57:25 - INFO - codeparrot_training - Step 36998: {'lr': 0.0004341272085914339, 'samples': 18943488, 'steps': 36998, 'loss/train': 1.2024465799331665} +03/05/2022 08:57:26 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 08:57:30 - INFO - codeparrot_training - Step 36999: {'lr': 0.00043412361891988763, 'samples': 18944000, 'steps': 36999, 'loss/train': 1.759675145149231} +03/05/2022 08:57:34 - INFO - codeparrot_training - Step 37000: {'lr': 0.0004341200291653781, 'samples': 18944512, 'steps': 37000, 'loss/train': 1.2698192596435547} +03/05/2022 08:57:35 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/05/2022 08:57:39 - INFO - codeparrot_training - Step 37001: {'lr': 0.00043411643932790686, 'samples': 18945024, 'steps': 37001, 'loss/train': 1.6138778924942017} +03/05/2022 08:57:42 - INFO - codeparrot_training - Step 37002: {'lr': 0.0004341128494074756, 'samples': 18945536, 'steps': 37002, 'loss/train': 1.8181004524230957} +03/05/2022 08:57:43 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 08:57:47 - INFO - codeparrot_training - Step 37003: {'lr': 0.00043410925940408595, 'samples': 18946048, 'steps': 37003, 'loss/train': 1.2387021780014038} +03/05/2022 08:57:50 - INFO - codeparrot_training - Step 37004: {'lr': 0.00043410566931773953, 'samples': 18946560, 'steps': 37004, 'loss/train': 6.483454704284668} +03/05/2022 08:57:51 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 08:57:56 - INFO - codeparrot_training - Step 37005: {'lr': 0.000434102079148438, 'samples': 18947072, 'steps': 37005, 'loss/train': 2.2539703845977783} +03/05/2022 08:57:59 - INFO - codeparrot_training - Step 37006: {'lr': 0.0004340984888961828, 'samples': 18947584, 'steps': 37006, 'loss/train': 1.6834895610809326} +03/05/2022 08:57:59 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/05/2022 08:58:04 - INFO - codeparrot_training - Step 37007: {'lr': 0.00043409489856097573, 'samples': 18948096, 'steps': 37007, 'loss/train': 1.2216817140579224} +03/05/2022 08:58:07 - INFO - codeparrot_training - Step 37008: {'lr': 0.0004340913081428183, 'samples': 18948608, 'steps': 37008, 'loss/train': 2.1062488555908203} +03/05/2022 08:58:08 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 08:58:13 - INFO - codeparrot_training - Step 37009: {'lr': 0.00043408771764171216, 'samples': 18949120, 'steps': 37009, 'loss/train': 1.6886597871780396} +03/05/2022 08:58:16 - INFO - codeparrot_training - Step 37010: {'lr': 0.000434084127057659, 'samples': 18949632, 'steps': 37010, 'loss/train': 1.2066068649291992} +03/05/2022 08:58:16 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 08:58:21 - INFO - codeparrot_training - Step 37011: {'lr': 0.0004340805363906603, 'samples': 18950144, 'steps': 37011, 'loss/train': 1.5570777654647827} +03/05/2022 08:58:24 - INFO - codeparrot_training - Step 37012: {'lr': 0.00043407694564071773, 'samples': 18950656, 'steps': 37012, 'loss/train': 2.297395944595337} +03/05/2022 08:58:24 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 08:58:29 - INFO - codeparrot_training - Step 37013: {'lr': 0.00043407335480783306, 'samples': 18951168, 'steps': 37013, 'loss/train': 1.80231511592865} +03/05/2022 08:58:32 - INFO - codeparrot_training - Step 37014: {'lr': 0.0004340697638920077, 'samples': 18951680, 'steps': 37014, 'loss/train': 1.4516441822052002} +03/05/2022 08:58:33 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/05/2022 08:58:38 - INFO - codeparrot_training - Step 37015: {'lr': 0.0004340661728932433, 'samples': 18952192, 'steps': 37015, 'loss/train': 2.000248908996582} +03/05/2022 08:58:41 - INFO - codeparrot_training - Step 37016: {'lr': 0.0004340625818115416, 'samples': 18952704, 'steps': 37016, 'loss/train': 1.497079610824585} +03/05/2022 08:58:41 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 08:58:46 - INFO - codeparrot_training - Step 37017: {'lr': 0.00043405899064690405, 'samples': 18953216, 'steps': 37017, 'loss/train': 0.7666917443275452} +03/05/2022 08:58:49 - INFO - codeparrot_training - Step 37018: {'lr': 0.0004340553993993325, 'samples': 18953728, 'steps': 37018, 'loss/train': 2.00129771232605} +03/05/2022 08:58:49 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 08:58:55 - INFO - codeparrot_training - Step 37019: {'lr': 0.0004340518080688283, 'samples': 18954240, 'steps': 37019, 'loss/train': 1.7645634412765503} +03/05/2022 08:58:58 - INFO - codeparrot_training - Step 37020: {'lr': 0.0004340482166553932, 'samples': 18954752, 'steps': 37020, 'loss/train': 1.5914667844772339} +03/05/2022 08:58:58 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/05/2022 08:59:03 - INFO - codeparrot_training - Step 37021: {'lr': 0.0004340446251590289, 'samples': 18955264, 'steps': 37021, 'loss/train': 1.7521941661834717} +03/05/2022 08:59:06 - INFO - codeparrot_training - Step 37022: {'lr': 0.00043404103357973684, 'samples': 18955776, 'steps': 37022, 'loss/train': 1.4570995569229126} +03/05/2022 08:59:07 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 08:59:12 - INFO - codeparrot_training - Step 37023: {'lr': 0.0004340374419175188, 'samples': 18956288, 'steps': 37023, 'loss/train': 2.462038278579712} +03/05/2022 08:59:15 - INFO - codeparrot_training - Step 37024: {'lr': 0.0004340338501723763, 'samples': 18956800, 'steps': 37024, 'loss/train': 0.6929447650909424} +03/05/2022 08:59:15 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 08:59:20 - INFO - codeparrot_training - Step 37025: {'lr': 0.00043403025834431097, 'samples': 18957312, 'steps': 37025, 'loss/train': 2.1161837577819824} +03/05/2022 08:59:23 - INFO - codeparrot_training - Step 37026: {'lr': 0.00043402666643332444, 'samples': 18957824, 'steps': 37026, 'loss/train': 1.7541412115097046} +03/05/2022 08:59:23 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 08:59:29 - INFO - codeparrot_training - Step 37027: {'lr': 0.00043402307443941835, 'samples': 18958336, 'steps': 37027, 'loss/train': 1.326864242553711} +03/05/2022 08:59:32 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/05/2022 08:59:34 - INFO - codeparrot_training - Step 37028: {'lr': 0.00043401948236259437, 'samples': 18958848, 'steps': 37028, 'loss/train': 0.7695289850234985} +03/05/2022 08:59:37 - INFO - codeparrot_training - Step 37029: {'lr': 0.000434015890202854, 'samples': 18959360, 'steps': 37029, 'loss/train': 1.2580878734588623} +03/05/2022 08:59:40 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 08:59:42 - INFO - codeparrot_training - Step 37030: {'lr': 0.0004340122979601989, 'samples': 18959872, 'steps': 37030, 'loss/train': 1.5722442865371704} +03/05/2022 08:59:46 - INFO - codeparrot_training - Step 37031: {'lr': 0.0004340087056346307, 'samples': 18960384, 'steps': 37031, 'loss/train': 1.861557126045227} +03/05/2022 08:59:48 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 08:59:51 - INFO - codeparrot_training - Step 37032: {'lr': 0.000434005113226151, 'samples': 18960896, 'steps': 37032, 'loss/train': 1.598203182220459} +03/05/2022 08:59:54 - INFO - codeparrot_training - Step 37033: {'lr': 0.0004340015207347614, 'samples': 18961408, 'steps': 37033, 'loss/train': 1.875545620918274} +03/05/2022 08:59:57 - INFO - codeparrot_training - Step 37034: {'lr': 0.0004339979281604636, 'samples': 18961920, 'steps': 37034, 'loss/train': 2.1928367614746094} +03/05/2022 08:59:57 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/05/2022 09:00:03 - INFO - codeparrot_training - Step 37035: {'lr': 0.00043399433550325917, 'samples': 18962432, 'steps': 37035, 'loss/train': 1.6792798042297363} +03/05/2022 09:00:06 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 09:00:08 - INFO - codeparrot_training - Step 37036: {'lr': 0.00043399074276314974, 'samples': 18962944, 'steps': 37036, 'loss/train': 2.3481316566467285} +03/05/2022 09:00:11 - INFO - codeparrot_training - Step 37037: {'lr': 0.00043398714994013696, 'samples': 18963456, 'steps': 37037, 'loss/train': 4.65722131729126} +03/05/2022 09:00:14 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 09:00:16 - INFO - codeparrot_training - Step 37038: {'lr': 0.00043398355703422233, 'samples': 18963968, 'steps': 37038, 'loss/train': 1.3179608583450317} +03/05/2022 09:00:20 - INFO - codeparrot_training - Step 37039: {'lr': 0.0004339799640454076, 'samples': 18964480, 'steps': 37039, 'loss/train': 2.0236051082611084} +03/05/2022 09:00:22 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/05/2022 09:00:25 - INFO - codeparrot_training - Step 37040: {'lr': 0.00043397637097369434, 'samples': 18964992, 'steps': 37040, 'loss/train': 6.31903076171875} +03/05/2022 09:00:28 - INFO - codeparrot_training - Step 37041: {'lr': 0.0004339727778190842, 'samples': 18965504, 'steps': 37041, 'loss/train': 0.9385872483253479} +03/05/2022 09:00:31 - INFO - codeparrot_training - Step 37042: {'lr': 0.0004339691845815786, 'samples': 18966016, 'steps': 37042, 'loss/train': 2.352531909942627} +03/05/2022 09:00:32 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 09:00:37 - INFO - codeparrot_training - Step 37043: {'lr': 0.0004339655912611795, 'samples': 18966528, 'steps': 37043, 'loss/train': 2.636232852935791} +03/05/2022 09:00:40 - INFO - codeparrot_training - Step 37044: {'lr': 0.00043396199785788824, 'samples': 18967040, 'steps': 37044, 'loss/train': 1.0448858737945557} +03/05/2022 09:00:40 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/05/2022 09:00:45 - INFO - codeparrot_training - Step 37045: {'lr': 0.00043395840437170666, 'samples': 18967552, 'steps': 37045, 'loss/train': 1.8448022603988647} +03/05/2022 09:00:48 - INFO - codeparrot_training - Step 37046: {'lr': 0.00043395481080263614, 'samples': 18968064, 'steps': 37046, 'loss/train': 2.050145149230957} +03/05/2022 09:00:48 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 09:00:53 - INFO - codeparrot_training - Step 37047: {'lr': 0.0004339512171506785, 'samples': 18968576, 'steps': 37047, 'loss/train': 1.3320791721343994} +03/05/2022 09:00:57 - INFO - codeparrot_training - Step 37048: {'lr': 0.0004339476234158352, 'samples': 18969088, 'steps': 37048, 'loss/train': 2.093526601791382} +03/05/2022 09:00:57 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 09:01:02 - INFO - codeparrot_training - Step 37049: {'lr': 0.00043394402959810795, 'samples': 18969600, 'steps': 37049, 'loss/train': 1.3402888774871826} +03/05/2022 09:01:05 - INFO - codeparrot_training - Step 37050: {'lr': 0.00043394043569749843, 'samples': 18970112, 'steps': 37050, 'loss/train': 1.6390565633773804} +03/05/2022 09:01:05 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 09:01:10 - INFO - codeparrot_training - Step 37051: {'lr': 0.00043393684171400817, 'samples': 18970624, 'steps': 37051, 'loss/train': 1.8414937257766724} +03/05/2022 09:01:13 - INFO - codeparrot_training - Step 37052: {'lr': 0.00043393324764763873, 'samples': 18971136, 'steps': 37052, 'loss/train': 1.9456350803375244} +03/05/2022 09:01:14 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/05/2022 09:01:19 - INFO - codeparrot_training - Step 37053: {'lr': 0.0004339296534983919, 'samples': 18971648, 'steps': 37053, 'loss/train': 2.578043222427368} +03/05/2022 09:01:22 - INFO - codeparrot_training - Step 37054: {'lr': 0.00043392605926626914, 'samples': 18972160, 'steps': 37054, 'loss/train': 0.452314168214798} +03/05/2022 09:01:22 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 09:01:27 - INFO - codeparrot_training - Step 37055: {'lr': 0.0004339224649512722, 'samples': 18972672, 'steps': 37055, 'loss/train': 1.5404256582260132} +03/05/2022 09:01:30 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 09:01:33 - INFO - codeparrot_training - Step 37056: {'lr': 0.00043391887055340263, 'samples': 18973184, 'steps': 37056, 'loss/train': 1.4709910154342651} +03/05/2022 09:01:36 - INFO - codeparrot_training - Step 37057: {'lr': 0.000433915276072662, 'samples': 18973696, 'steps': 37057, 'loss/train': 2.0538721084594727} +03/05/2022 09:01:39 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 09:01:41 - INFO - codeparrot_training - Step 37058: {'lr': 0.00043391168150905203, 'samples': 18974208, 'steps': 37058, 'loss/train': 0.6300174593925476} +03/05/2022 09:01:44 - INFO - codeparrot_training - Step 37059: {'lr': 0.0004339080868625743, 'samples': 18974720, 'steps': 37059, 'loss/train': 1.4279366731643677} +03/05/2022 09:01:48 - INFO - codeparrot_training - Step 37060: {'lr': 0.0004339044921332304, 'samples': 18975232, 'steps': 37060, 'loss/train': 1.7105212211608887} +03/05/2022 09:01:48 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 09:01:53 - INFO - codeparrot_training - Step 37061: {'lr': 0.000433900897321022, 'samples': 18975744, 'steps': 37061, 'loss/train': 0.9826271533966064} +03/05/2022 09:01:56 - INFO - codeparrot_training - Step 37062: {'lr': 0.0004338973024259506, 'samples': 18976256, 'steps': 37062, 'loss/train': 1.8923547267913818} +03/05/2022 09:01:56 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/05/2022 09:02:01 - INFO - codeparrot_training - Step 37063: {'lr': 0.00043389370744801806, 'samples': 18976768, 'steps': 37063, 'loss/train': 2.0814192295074463} +03/05/2022 09:02:04 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/05/2022 09:02:07 - INFO - codeparrot_training - Step 37064: {'lr': 0.00043389011238722575, 'samples': 18977280, 'steps': 37064, 'loss/train': 1.6772959232330322} +03/05/2022 09:02:10 - INFO - codeparrot_training - Step 37065: {'lr': 0.0004338865172435754, 'samples': 18977792, 'steps': 37065, 'loss/train': 2.094710111618042} +03/05/2022 09:02:13 - INFO - codeparrot_training - Step 37066: {'lr': 0.00043388292201706867, 'samples': 18978304, 'steps': 37066, 'loss/train': 1.1918777227401733} +03/05/2022 09:02:14 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/05/2022 09:02:18 - INFO - codeparrot_training - Step 37067: {'lr': 0.0004338793267077071, 'samples': 18978816, 'steps': 37067, 'loss/train': 1.4953997135162354} +03/05/2022 09:02:22 - INFO - codeparrot_training - Step 37068: {'lr': 0.0004338757313154923, 'samples': 18979328, 'steps': 37068, 'loss/train': 1.210745096206665} +03/05/2022 09:02:22 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/05/2022 09:02:27 - INFO - codeparrot_training - Step 37069: {'lr': 0.000433872135840426, 'samples': 18979840, 'steps': 37069, 'loss/train': 1.6313426494598389} +03/05/2022 09:02:30 - INFO - codeparrot_training - Step 37070: {'lr': 0.00043386854028250977, 'samples': 18980352, 'steps': 37070, 'loss/train': 1.7846847772598267} +03/05/2022 09:02:31 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/05/2022 09:02:35 - INFO - codeparrot_training - Step 37071: {'lr': 0.00043386494464174515, 'samples': 18980864, 'steps': 37071, 'loss/train': 1.1558401584625244} +03/05/2022 09:02:39 - INFO - codeparrot_training - Step 37072: {'lr': 0.0004338613489181338, 'samples': 18981376, 'steps': 37072, 'loss/train': 2.019453763961792} +03/05/2022 09:02:39 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/05/2022 09:02:44 - INFO - codeparrot_training - Step 37073: {'lr': 0.00043385775311167746, 'samples': 18981888, 'steps': 37073, 'loss/train': 1.5930427312850952} +03/05/2022 09:02:47 - INFO - codeparrot_training - Step 37074: {'lr': 0.00043385415722237765, 'samples': 18982400, 'steps': 37074, 'loss/train': 2.278592824935913} +03/05/2022 09:02:48 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/05/2022 09:02:52 - INFO - codeparrot_training - Step 37075: {'lr': 0.0004338505612502359, 'samples': 18982912, 'steps': 37075, 'loss/train': 1.1396745443344116} +03/05/2022 09:02:55 - INFO - codeparrot_training - Step 37076: {'lr': 0.000433846965195254, 'samples': 18983424, 'steps': 37076, 'loss/train': 1.8427760601043701} +03/05/2022 09:02:56 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 09:03:01 - INFO - codeparrot_training - Step 37077: {'lr': 0.00043384336905743343, 'samples': 18983936, 'steps': 37077, 'loss/train': 1.382001280784607} +03/05/2022 09:03:04 - INFO - codeparrot_training - Step 37078: {'lr': 0.0004338397728367759, 'samples': 18984448, 'steps': 37078, 'loss/train': 0.9638236165046692} +03/05/2022 09:03:05 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 09:03:09 - INFO - codeparrot_training - Step 37079: {'lr': 0.000433836176533283, 'samples': 18984960, 'steps': 37079, 'loss/train': 2.134312868118286} +03/05/2022 09:03:12 - INFO - codeparrot_training - Step 37080: {'lr': 0.0004338325801469564, 'samples': 18985472, 'steps': 37080, 'loss/train': 1.6404387950897217} +03/05/2022 09:03:13 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 09:03:17 - INFO - codeparrot_training - Step 37081: {'lr': 0.00043382898367779767, 'samples': 18985984, 'steps': 37081, 'loss/train': 2.837474822998047} +03/05/2022 09:03:21 - INFO - codeparrot_training - Step 37082: {'lr': 0.00043382538712580845, 'samples': 18986496, 'steps': 37082, 'loss/train': 1.774423360824585} +03/05/2022 09:03:21 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/05/2022 09:03:26 - INFO - codeparrot_training - Step 37083: {'lr': 0.00043382179049099024, 'samples': 18987008, 'steps': 37083, 'loss/train': 2.0665252208709717} +03/05/2022 09:03:29 - INFO - codeparrot_training - Step 37084: {'lr': 0.00043381819377334485, 'samples': 18987520, 'steps': 37084, 'loss/train': 1.8939987421035767} +03/05/2022 09:03:30 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/05/2022 09:03:34 - INFO - codeparrot_training - Step 37085: {'lr': 0.00043381459697287383, 'samples': 18988032, 'steps': 37085, 'loss/train': 1.8019187450408936} +03/05/2022 09:03:37 - INFO - codeparrot_training - Step 37086: {'lr': 0.0004338110000895787, 'samples': 18988544, 'steps': 37086, 'loss/train': 1.7120757102966309} +03/05/2022 09:03:38 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/05/2022 09:03:43 - INFO - codeparrot_training - Step 37087: {'lr': 0.00043380740312346135, 'samples': 18989056, 'steps': 37087, 'loss/train': 2.1594691276550293} +03/05/2022 09:03:46 - INFO - codeparrot_training - Step 37088: {'lr': 0.00043380380607452307, 'samples': 18989568, 'steps': 37088, 'loss/train': 1.8559999465942383} +03/05/2022 09:03:46 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/05/2022 09:03:51 - INFO - codeparrot_training - Step 37089: {'lr': 0.0004338002089427657, 'samples': 18990080, 'steps': 37089, 'loss/train': 1.9000734090805054} +03/05/2022 09:03:54 - INFO - codeparrot_training - Step 37090: {'lr': 0.00043379661172819075, 'samples': 18990592, 'steps': 37090, 'loss/train': 1.9134142398834229} +03/05/2022 09:03:55 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/05/2022 09:04:00 - INFO - codeparrot_training - Step 37091: {'lr': 0.0004337930144307999, 'samples': 18991104, 'steps': 37091, 'loss/train': 1.127015471458435} +03/05/2022 09:04:03 - INFO - codeparrot_training - Step 37092: {'lr': 0.0004337894170505947, 'samples': 18991616, 'steps': 37092, 'loss/train': 1.6810263395309448} +03/05/2022 09:04:04 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/05/2022 09:04:08 - INFO - codeparrot_training - Step 37093: {'lr': 0.0004337858195875769, 'samples': 18992128, 'steps': 37093, 'loss/train': 1.7631961107254028} +03/05/2022 09:04:11 - INFO - codeparrot_training - Step 37094: {'lr': 0.00043378222204174807, 'samples': 18992640, 'steps': 37094, 'loss/train': 1.349923014640808} +03/05/2022 09:04:13 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 09:04:17 - INFO - codeparrot_training - Step 37095: {'lr': 0.0004337786244131097, 'samples': 18993152, 'steps': 37095, 'loss/train': 2.4047088623046875} +03/05/2022 09:04:20 - INFO - codeparrot_training - Step 37096: {'lr': 0.00043377502670166357, 'samples': 18993664, 'steps': 37096, 'loss/train': 1.925308346748352} +03/05/2022 09:04:21 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/05/2022 09:04:25 - INFO - codeparrot_training - Step 37097: {'lr': 0.0004337714289074113, 'samples': 18994176, 'steps': 37097, 'loss/train': 1.716721773147583} +03/05/2022 09:04:28 - INFO - codeparrot_training - Step 37098: {'lr': 0.0004337678310303544, 'samples': 18994688, 'steps': 37098, 'loss/train': 1.7418791055679321} +03/05/2022 09:04:30 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/05/2022 09:04:34 - INFO - codeparrot_training - Step 37099: {'lr': 0.00043376423307049455, 'samples': 18995200, 'steps': 37099, 'loss/train': 1.6615022420883179} +03/05/2022 09:04:37 - INFO - codeparrot_training - Step 37100: {'lr': 0.00043376063502783337, 'samples': 18995712, 'steps': 37100, 'loss/train': 1.8112053871154785} +03/05/2022 09:04:38 - INFO - codeparrot_training - Skipping example with length 960 (seq_length=1024) +03/05/2022 09:04:42 - INFO - codeparrot_training - Step 37101: {'lr': 0.00043375703690237254, 'samples': 18996224, 'steps': 37101, 'loss/train': 1.8027398586273193} +03/05/2022 09:04:45 - INFO - codeparrot_training - Step 37102: {'lr': 0.0004337534386941135, 'samples': 18996736, 'steps': 37102, 'loss/train': 1.7639185190200806} +03/05/2022 09:04:46 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 09:04:50 - INFO - codeparrot_training - Step 37103: {'lr': 0.00043374984040305816, 'samples': 18997248, 'steps': 37103, 'loss/train': 1.823492169380188} +03/05/2022 09:04:54 - INFO - codeparrot_training - Step 37104: {'lr': 0.00043374624202920786, 'samples': 18997760, 'steps': 37104, 'loss/train': 1.4477523565292358} +03/05/2022 09:04:55 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 09:04:59 - INFO - codeparrot_training - Step 37105: {'lr': 0.0004337426435725644, 'samples': 18998272, 'steps': 37105, 'loss/train': 0.8837972283363342} +03/05/2022 09:05:02 - INFO - codeparrot_training - Step 37106: {'lr': 0.00043373904503312934, 'samples': 18998784, 'steps': 37106, 'loss/train': 1.530753254890442} +03/05/2022 09:05:03 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 09:05:07 - INFO - codeparrot_training - Step 37107: {'lr': 0.0004337354464109042, 'samples': 18999296, 'steps': 37107, 'loss/train': 1.6046457290649414} +03/05/2022 09:05:11 - INFO - codeparrot_training - Step 37108: {'lr': 0.0004337318477058908, 'samples': 18999808, 'steps': 37108, 'loss/train': 1.0146933794021606} +03/05/2022 09:05:12 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 09:05:16 - INFO - codeparrot_training - Step 37109: {'lr': 0.0004337282489180907, 'samples': 19000320, 'steps': 37109, 'loss/train': 2.0390138626098633} +03/05/2022 09:05:19 - INFO - codeparrot_training - Step 37110: {'lr': 0.0004337246500475054, 'samples': 19000832, 'steps': 37110, 'loss/train': 1.3324987888336182} +03/05/2022 09:05:20 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/05/2022 09:05:24 - INFO - codeparrot_training - Step 37111: {'lr': 0.0004337210510941366, 'samples': 19001344, 'steps': 37111, 'loss/train': 1.435253620147705} +03/05/2022 09:05:27 - INFO - codeparrot_training - Step 37112: {'lr': 0.000433717452057986, 'samples': 19001856, 'steps': 37112, 'loss/train': 1.1810600757598877} +03/05/2022 09:05:29 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 09:05:33 - INFO - codeparrot_training - Step 37113: {'lr': 0.00043371385293905517, 'samples': 19002368, 'steps': 37113, 'loss/train': 5.195160865783691} +03/05/2022 09:05:36 - INFO - codeparrot_training - Step 37114: {'lr': 0.0004337102537373456, 'samples': 19002880, 'steps': 37114, 'loss/train': 1.6097990274429321} +03/05/2022 09:05:38 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 09:05:41 - INFO - codeparrot_training - Step 37115: {'lr': 0.0004337066544528591, 'samples': 19003392, 'steps': 37115, 'loss/train': 1.4061896800994873} +03/05/2022 09:05:44 - INFO - codeparrot_training - Step 37116: {'lr': 0.00043370305508559723, 'samples': 19003904, 'steps': 37116, 'loss/train': 4.303162097930908} +03/05/2022 09:05:46 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 09:05:50 - INFO - codeparrot_training - Step 37117: {'lr': 0.00043369945563556157, 'samples': 19004416, 'steps': 37117, 'loss/train': 1.7049157619476318} +03/05/2022 09:05:53 - INFO - codeparrot_training - Step 37118: {'lr': 0.00043369585610275374, 'samples': 19004928, 'steps': 37118, 'loss/train': 2.2402255535125732} +03/05/2022 09:05:54 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 09:05:58 - INFO - codeparrot_training - Step 37119: {'lr': 0.0004336922564871755, 'samples': 19005440, 'steps': 37119, 'loss/train': 2.4325311183929443} +03/05/2022 09:06:01 - INFO - codeparrot_training - Step 37120: {'lr': 0.00043368865678882824, 'samples': 19005952, 'steps': 37120, 'loss/train': 1.8013064861297607} +03/05/2022 09:06:03 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 09:06:06 - INFO - codeparrot_training - Step 37121: {'lr': 0.00043368505700771377, 'samples': 19006464, 'steps': 37121, 'loss/train': 2.6308374404907227} +03/05/2022 09:06:10 - INFO - codeparrot_training - Step 37122: {'lr': 0.00043368145714383364, 'samples': 19006976, 'steps': 37122, 'loss/train': 1.3473470211029053} +03/05/2022 09:06:11 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/05/2022 09:06:15 - INFO - codeparrot_training - Step 37123: {'lr': 0.00043367785719718947, 'samples': 19007488, 'steps': 37123, 'loss/train': 2.3524458408355713} +03/05/2022 09:06:18 - INFO - codeparrot_training - Step 37124: {'lr': 0.0004336742571677829, 'samples': 19008000, 'steps': 37124, 'loss/train': 1.8188061714172363} +03/05/2022 09:06:20 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 09:06:23 - INFO - codeparrot_training - Step 37125: {'lr': 0.00043367065705561547, 'samples': 19008512, 'steps': 37125, 'loss/train': 2.0739035606384277} +03/05/2022 09:06:27 - INFO - codeparrot_training - Step 37126: {'lr': 0.00043366705686068895, 'samples': 19009024, 'steps': 37126, 'loss/train': 1.7980880737304688} +03/05/2022 09:06:28 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/05/2022 09:06:32 - INFO - codeparrot_training - Step 37127: {'lr': 0.0004336634565830049, 'samples': 19009536, 'steps': 37127, 'loss/train': 1.8266270160675049} +03/05/2022 09:06:35 - INFO - codeparrot_training - Step 37128: {'lr': 0.0004336598562225649, 'samples': 19010048, 'steps': 37128, 'loss/train': 1.9881590604782104} +03/05/2022 09:06:36 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 09:06:40 - INFO - codeparrot_training - Step 37129: {'lr': 0.00043365625577937065, 'samples': 19010560, 'steps': 37129, 'loss/train': 1.5430338382720947} +03/05/2022 09:06:43 - INFO - codeparrot_training - Step 37130: {'lr': 0.00043365265525342365, 'samples': 19011072, 'steps': 37130, 'loss/train': 1.7834327220916748} +03/05/2022 09:06:45 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/05/2022 09:06:49 - INFO - codeparrot_training - Step 37131: {'lr': 0.00043364905464472563, 'samples': 19011584, 'steps': 37131, 'loss/train': 2.231950044631958} +03/05/2022 09:06:52 - INFO - codeparrot_training - Step 37132: {'lr': 0.0004336454539532782, 'samples': 19012096, 'steps': 37132, 'loss/train': 0.37439656257629395} +03/05/2022 09:06:54 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/05/2022 09:06:57 - INFO - codeparrot_training - Step 37133: {'lr': 0.00043364185317908296, 'samples': 19012608, 'steps': 37133, 'loss/train': 1.3625109195709229} +03/05/2022 09:07:00 - INFO - codeparrot_training - Step 37134: {'lr': 0.0004336382523221415, 'samples': 19013120, 'steps': 37134, 'loss/train': 2.1972014904022217} +03/05/2022 09:07:02 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 09:07:06 - INFO - codeparrot_training - Step 37135: {'lr': 0.0004336346513824555, 'samples': 19013632, 'steps': 37135, 'loss/train': 1.7125988006591797} +03/05/2022 09:07:09 - INFO - codeparrot_training - Step 37136: {'lr': 0.0004336310503600266, 'samples': 19014144, 'steps': 37136, 'loss/train': 1.155924677848816} +03/05/2022 09:07:11 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 09:07:14 - INFO - codeparrot_training - Step 37137: {'lr': 0.0004336274492548563, 'samples': 19014656, 'steps': 37137, 'loss/train': 1.2122522592544556} +03/05/2022 09:07:17 - INFO - codeparrot_training - Step 37138: {'lr': 0.0004336238480669463, 'samples': 19015168, 'steps': 37138, 'loss/train': 3.2225561141967773} +03/05/2022 09:07:20 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 09:07:23 - INFO - codeparrot_training - Step 37139: {'lr': 0.0004336202467962983, 'samples': 19015680, 'steps': 37139, 'loss/train': 0.8786723613739014} +03/05/2022 09:07:26 - INFO - codeparrot_training - Step 37140: {'lr': 0.0004336166454429139, 'samples': 19016192, 'steps': 37140, 'loss/train': 0.8320361375808716} +03/05/2022 09:07:28 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 09:07:31 - INFO - codeparrot_training - Step 37141: {'lr': 0.0004336130440067946, 'samples': 19016704, 'steps': 37141, 'loss/train': 1.0584031343460083} +03/05/2022 09:07:34 - INFO - codeparrot_training - Step 37142: {'lr': 0.000433609442487942, 'samples': 19017216, 'steps': 37142, 'loss/train': 1.1211755275726318} +03/05/2022 09:07:36 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 09:07:39 - INFO - codeparrot_training - Step 37143: {'lr': 0.00043360584088635804, 'samples': 19017728, 'steps': 37143, 'loss/train': 2.1521003246307373} +03/05/2022 09:07:43 - INFO - codeparrot_training - Step 37144: {'lr': 0.0004336022392020439, 'samples': 19018240, 'steps': 37144, 'loss/train': 1.4619868993759155} +03/05/2022 09:07:44 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 09:07:48 - INFO - codeparrot_training - Step 37145: {'lr': 0.0004335986374350015, 'samples': 19018752, 'steps': 37145, 'loss/train': 0.9908483028411865} +03/05/2022 09:07:51 - INFO - codeparrot_training - Step 37146: {'lr': 0.00043359503558523246, 'samples': 19019264, 'steps': 37146, 'loss/train': 2.0730772018432617} +03/05/2022 09:07:53 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/05/2022 09:07:56 - INFO - codeparrot_training - Step 37147: {'lr': 0.0004335914336527382, 'samples': 19019776, 'steps': 37147, 'loss/train': 1.8659542798995972} +03/05/2022 09:07:59 - INFO - codeparrot_training - Step 37148: {'lr': 0.0004335878316375206, 'samples': 19020288, 'steps': 37148, 'loss/train': 1.8043835163116455} +03/05/2022 09:08:01 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/05/2022 09:08:05 - INFO - codeparrot_training - Step 37149: {'lr': 0.0004335842295395811, 'samples': 19020800, 'steps': 37149, 'loss/train': 1.260195016860962} +03/05/2022 09:08:08 - INFO - codeparrot_training - Step 37150: {'lr': 0.0004335806273589214, 'samples': 19021312, 'steps': 37150, 'loss/train': 1.5253543853759766} +03/05/2022 09:08:10 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/05/2022 09:08:13 - INFO - codeparrot_training - Step 37151: {'lr': 0.0004335770250955431, 'samples': 19021824, 'steps': 37151, 'loss/train': 1.5623302459716797} +03/05/2022 09:08:16 - INFO - codeparrot_training - Step 37152: {'lr': 0.0004335734227494478, 'samples': 19022336, 'steps': 37152, 'loss/train': 1.019531011581421} +03/05/2022 09:08:18 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/05/2022 09:08:22 - INFO - codeparrot_training - Step 37153: {'lr': 0.0004335698203206372, 'samples': 19022848, 'steps': 37153, 'loss/train': 1.488417148590088} +03/05/2022 09:08:25 - INFO - codeparrot_training - Step 37154: {'lr': 0.00043356621780911273, 'samples': 19023360, 'steps': 37154, 'loss/train': 2.2847955226898193} +03/05/2022 09:08:27 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/05/2022 09:08:30 - INFO - codeparrot_training - Step 37155: {'lr': 0.0004335626152148763, 'samples': 19023872, 'steps': 37155, 'loss/train': 1.5035854578018188} +03/05/2022 09:08:33 - INFO - codeparrot_training - Step 37156: {'lr': 0.0004335590125379293, 'samples': 19024384, 'steps': 37156, 'loss/train': 2.558440923690796} +03/05/2022 09:08:36 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/05/2022 09:08:39 - INFO - codeparrot_training - Step 37157: {'lr': 0.00043355540977827356, 'samples': 19024896, 'steps': 37157, 'loss/train': 2.4427340030670166} +03/05/2022 09:08:42 - INFO - codeparrot_training - Step 37158: {'lr': 0.0004335518069359105, 'samples': 19025408, 'steps': 37158, 'loss/train': 1.1898554563522339} +03/05/2022 09:08:44 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 09:08:47 - INFO - codeparrot_training - Step 37159: {'lr': 0.0004335482040108418, 'samples': 19025920, 'steps': 37159, 'loss/train': 1.855122447013855} +03/05/2022 09:08:50 - INFO - codeparrot_training - Step 37160: {'lr': 0.00043354460100306915, 'samples': 19026432, 'steps': 37160, 'loss/train': 0.7699187994003296} +03/05/2022 09:08:52 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/05/2022 09:08:55 - INFO - codeparrot_training - Step 37161: {'lr': 0.00043354099791259414, 'samples': 19026944, 'steps': 37161, 'loss/train': 1.342292070388794} +03/05/2022 09:08:59 - INFO - codeparrot_training - Step 37162: {'lr': 0.00043353739473941846, 'samples': 19027456, 'steps': 37162, 'loss/train': 1.8867727518081665} +03/05/2022 09:09:01 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 09:09:04 - INFO - codeparrot_training - Step 37163: {'lr': 0.0004335337914835435, 'samples': 19027968, 'steps': 37163, 'loss/train': 1.7273555994033813} +03/05/2022 09:09:07 - INFO - codeparrot_training - Step 37164: {'lr': 0.0004335301881449711, 'samples': 19028480, 'steps': 37164, 'loss/train': 2.3171653747558594} +03/05/2022 09:09:09 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/05/2022 09:09:12 - INFO - codeparrot_training - Step 37165: {'lr': 0.00043352658472370294, 'samples': 19028992, 'steps': 37165, 'loss/train': 1.138758897781372} +03/05/2022 09:09:15 - INFO - codeparrot_training - Step 37166: {'lr': 0.00043352298121974043, 'samples': 19029504, 'steps': 37166, 'loss/train': 2.079317331314087} +03/05/2022 09:09:17 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 09:09:21 - INFO - codeparrot_training - Step 37167: {'lr': 0.00043351937763308533, 'samples': 19030016, 'steps': 37167, 'loss/train': 1.6257760524749756} +03/05/2022 09:09:24 - INFO - codeparrot_training - Step 37168: {'lr': 0.0004335157739637392, 'samples': 19030528, 'steps': 37168, 'loss/train': 1.5245778560638428} +03/05/2022 09:09:26 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 09:09:29 - INFO - codeparrot_training - Step 37169: {'lr': 0.0004335121702117038, 'samples': 19031040, 'steps': 37169, 'loss/train': 1.6117690801620483} +03/05/2022 09:09:32 - INFO - codeparrot_training - Step 37170: {'lr': 0.0004335085663769805, 'samples': 19031552, 'steps': 37170, 'loss/train': 1.4633811712265015} +03/05/2022 09:09:35 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/05/2022 09:09:38 - INFO - codeparrot_training - Step 37171: {'lr': 0.00043350496245957116, 'samples': 19032064, 'steps': 37171, 'loss/train': 1.8071073293685913} +03/05/2022 09:09:41 - INFO - codeparrot_training - Step 37172: {'lr': 0.00043350135845947725, 'samples': 19032576, 'steps': 37172, 'loss/train': 2.6616435050964355} +03/05/2022 09:09:44 - INFO - codeparrot_training - Step 37173: {'lr': 0.00043349775437670046, 'samples': 19033088, 'steps': 37173, 'loss/train': 1.2873822450637817} +03/05/2022 09:09:44 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 09:09:49 - INFO - codeparrot_training - Step 37174: {'lr': 0.0004334941502112425, 'samples': 19033600, 'steps': 37174, 'loss/train': 1.9465914964675903} +03/05/2022 09:09:53 - INFO - codeparrot_training - Step 37175: {'lr': 0.0004334905459631049, 'samples': 19034112, 'steps': 37175, 'loss/train': 1.5150432586669922} +03/05/2022 09:09:53 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/05/2022 09:09:58 - INFO - codeparrot_training - Step 37176: {'lr': 0.0004334869416322892, 'samples': 19034624, 'steps': 37176, 'loss/train': 0.9372521042823792} +03/05/2022 09:10:01 - INFO - codeparrot_training - Step 37177: {'lr': 0.0004334833372187972, 'samples': 19035136, 'steps': 37177, 'loss/train': 3.279226779937744} +03/05/2022 09:10:01 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 09:10:06 - INFO - codeparrot_training - Step 37178: {'lr': 0.0004334797327226304, 'samples': 19035648, 'steps': 37178, 'loss/train': 0.5276676416397095} +03/05/2022 09:10:09 - INFO - codeparrot_training - Step 37179: {'lr': 0.00043347612814379047, 'samples': 19036160, 'steps': 37179, 'loss/train': 1.4646509885787964} +03/05/2022 09:10:09 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/05/2022 09:10:15 - INFO - codeparrot_training - Step 37180: {'lr': 0.000433472523482279, 'samples': 19036672, 'steps': 37180, 'loss/train': 1.7994753122329712} +03/05/2022 09:10:17 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/05/2022 09:10:20 - INFO - codeparrot_training - Step 37181: {'lr': 0.0004334689187380977, 'samples': 19037184, 'steps': 37181, 'loss/train': 2.0101513862609863} +03/05/2022 09:10:23 - INFO - codeparrot_training - Step 37182: {'lr': 0.0004334653139112481, 'samples': 19037696, 'steps': 37182, 'loss/train': 1.8706097602844238} +03/05/2022 09:10:26 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/05/2022 09:10:28 - INFO - codeparrot_training - Step 37183: {'lr': 0.0004334617090017319, 'samples': 19038208, 'steps': 37183, 'loss/train': 0.9321692585945129} +03/05/2022 09:10:32 - INFO - codeparrot_training - Step 37184: {'lr': 0.0004334581040095506, 'samples': 19038720, 'steps': 37184, 'loss/train': 1.9465187788009644} +03/05/2022 09:10:34 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/05/2022 09:10:37 - INFO - codeparrot_training - Step 37185: {'lr': 0.00043345449893470594, 'samples': 19039232, 'steps': 37185, 'loss/train': 1.8337160348892212} +03/05/2022 09:10:40 - INFO - codeparrot_training - Step 37186: {'lr': 0.00043345089377719954, 'samples': 19039744, 'steps': 37186, 'loss/train': 1.20099675655365} +03/05/2022 09:10:43 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/05/2022 09:10:45 - INFO - codeparrot_training - Step 37187: {'lr': 0.00043344728853703297, 'samples': 19040256, 'steps': 37187, 'loss/train': 1.42197585105896} +03/05/2022 09:10:48 - INFO - codeparrot_training - Step 37188: {'lr': 0.0004334436832142079, 'samples': 19040768, 'steps': 37188, 'loss/train': 1.7639576196670532} +03/05/2022 09:10:51 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 09:10:54 - INFO - codeparrot_training - Step 37189: {'lr': 0.000433440077808726, 'samples': 19041280, 'steps': 37189, 'loss/train': 1.499832272529602} +03/05/2022 09:10:57 - INFO - codeparrot_training - Step 37190: {'lr': 0.00043343647232058877, 'samples': 19041792, 'steps': 37190, 'loss/train': 1.8163385391235352} +03/05/2022 09:10:59 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 09:11:02 - INFO - codeparrot_training - Step 37191: {'lr': 0.0004334328667497979, 'samples': 19042304, 'steps': 37191, 'loss/train': 1.8642280101776123} +03/05/2022 09:11:05 - INFO - codeparrot_training - Step 37192: {'lr': 0.00043342926109635497, 'samples': 19042816, 'steps': 37192, 'loss/train': 1.150327444076538} +03/05/2022 09:11:08 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/05/2022 09:11:11 - INFO - codeparrot_training - Step 37193: {'lr': 0.0004334256553602617, 'samples': 19043328, 'steps': 37193, 'loss/train': 1.3464022874832153} +03/05/2022 09:11:14 - INFO - codeparrot_training - Step 37194: {'lr': 0.00043342204954151963, 'samples': 19043840, 'steps': 37194, 'loss/train': 0.9517424702644348} +03/05/2022 09:11:16 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/05/2022 09:11:19 - INFO - codeparrot_training - Step 37195: {'lr': 0.00043341844364013047, 'samples': 19044352, 'steps': 37195, 'loss/train': 1.779246211051941} +03/05/2022 09:11:22 - INFO - codeparrot_training - Step 37196: {'lr': 0.00043341483765609566, 'samples': 19044864, 'steps': 37196, 'loss/train': 1.291870355606079} +03/05/2022 09:11:24 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 09:11:27 - INFO - codeparrot_training - Step 37197: {'lr': 0.0004334112315894171, 'samples': 19045376, 'steps': 37197, 'loss/train': 2.047727108001709} +03/05/2022 09:11:31 - INFO - codeparrot_training - Step 37198: {'lr': 0.00043340762544009627, 'samples': 19045888, 'steps': 37198, 'loss/train': 1.1536909341812134} +03/05/2022 09:11:33 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 09:11:36 - INFO - codeparrot_training - Step 37199: {'lr': 0.0004334040192081347, 'samples': 19046400, 'steps': 37199, 'loss/train': 1.740929126739502} +03/05/2022 09:11:39 - INFO - codeparrot_training - Step 37200: {'lr': 0.00043340041289353416, 'samples': 19046912, 'steps': 37200, 'loss/train': 2.8420591354370117} +03/05/2022 09:11:41 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 09:11:44 - INFO - codeparrot_training - Step 37201: {'lr': 0.0004333968064962962, 'samples': 19047424, 'steps': 37201, 'loss/train': 1.2579494714736938} +03/05/2022 09:11:47 - INFO - codeparrot_training - Step 37202: {'lr': 0.00043339320001642244, 'samples': 19047936, 'steps': 37202, 'loss/train': 0.5379994511604309} +03/05/2022 09:11:49 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/05/2022 09:11:53 - INFO - codeparrot_training - Step 37203: {'lr': 0.0004333895934539146, 'samples': 19048448, 'steps': 37203, 'loss/train': 2.1487693786621094} +03/05/2022 09:11:56 - INFO - codeparrot_training - Step 37204: {'lr': 0.00043338598680877423, 'samples': 19048960, 'steps': 37204, 'loss/train': 2.5150232315063477} +03/05/2022 09:11:58 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 09:12:01 - INFO - codeparrot_training - Step 37205: {'lr': 0.00043338238008100297, 'samples': 19049472, 'steps': 37205, 'loss/train': 1.4111640453338623} +03/05/2022 09:12:04 - INFO - codeparrot_training - Step 37206: {'lr': 0.0004333787732706024, 'samples': 19049984, 'steps': 37206, 'loss/train': 0.6248279809951782} +03/05/2022 09:12:06 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 09:12:10 - INFO - codeparrot_training - Step 37207: {'lr': 0.00043337516637757416, 'samples': 19050496, 'steps': 37207, 'loss/train': 1.9875731468200684} +03/05/2022 09:12:13 - INFO - codeparrot_training - Step 37208: {'lr': 0.00043337155940191996, 'samples': 19051008, 'steps': 37208, 'loss/train': 1.2876697778701782} +03/05/2022 09:12:15 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 09:12:18 - INFO - codeparrot_training - Step 37209: {'lr': 0.0004333679523436413, 'samples': 19051520, 'steps': 37209, 'loss/train': 1.8852626085281372} +03/05/2022 09:12:21 - INFO - codeparrot_training - Step 37210: {'lr': 0.0004333643452027399, 'samples': 19052032, 'steps': 37210, 'loss/train': 1.2152680158615112} +03/05/2022 09:12:24 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/05/2022 09:12:27 - INFO - codeparrot_training - Step 37211: {'lr': 0.00043336073797921743, 'samples': 19052544, 'steps': 37211, 'loss/train': 2.0774142742156982} +03/05/2022 09:12:30 - INFO - codeparrot_training - Step 37212: {'lr': 0.0004333571306730754, 'samples': 19053056, 'steps': 37212, 'loss/train': 1.1418336629867554} +03/05/2022 09:12:32 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/05/2022 09:12:35 - INFO - codeparrot_training - Step 37213: {'lr': 0.00043335352328431544, 'samples': 19053568, 'steps': 37213, 'loss/train': 2.3217196464538574} +03/05/2022 09:12:38 - INFO - codeparrot_training - Step 37214: {'lr': 0.00043334991581293924, 'samples': 19054080, 'steps': 37214, 'loss/train': 1.5781137943267822} +03/05/2022 09:12:40 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 09:12:44 - INFO - codeparrot_training - Step 37215: {'lr': 0.0004333463082589484, 'samples': 19054592, 'steps': 37215, 'loss/train': 0.8202218413352966} +03/05/2022 09:12:47 - INFO - codeparrot_training - Step 37216: {'lr': 0.0004333427006223445, 'samples': 19055104, 'steps': 37216, 'loss/train': 2.125861883163452} +03/05/2022 09:12:49 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/05/2022 09:12:52 - INFO - codeparrot_training - Step 37217: {'lr': 0.00043333909290312923, 'samples': 19055616, 'steps': 37217, 'loss/train': 2.398944139480591} +03/05/2022 09:12:55 - INFO - codeparrot_training - Step 37218: {'lr': 0.00043333548510130426, 'samples': 19056128, 'steps': 37218, 'loss/train': 1.9081380367279053} +03/05/2022 09:12:58 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 09:13:00 - INFO - codeparrot_training - Step 37219: {'lr': 0.00043333187721687104, 'samples': 19056640, 'steps': 37219, 'loss/train': 1.366235613822937} +03/05/2022 09:13:04 - INFO - codeparrot_training - Step 37220: {'lr': 0.0004333282692498314, 'samples': 19057152, 'steps': 37220, 'loss/train': 1.055216908454895} +03/05/2022 09:13:06 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 09:13:09 - INFO - codeparrot_training - Step 37221: {'lr': 0.00043332466120018685, 'samples': 19057664, 'steps': 37221, 'loss/train': 1.3197872638702393} +03/05/2022 09:13:12 - INFO - codeparrot_training - Step 37222: {'lr': 0.000433321053067939, 'samples': 19058176, 'steps': 37222, 'loss/train': 1.0913422107696533} +03/05/2022 09:13:15 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/05/2022 09:13:17 - INFO - codeparrot_training - Step 37223: {'lr': 0.00043331744485308954, 'samples': 19058688, 'steps': 37223, 'loss/train': 2.7695372104644775} +03/05/2022 09:13:20 - INFO - codeparrot_training - Step 37224: {'lr': 0.00043331383655564003, 'samples': 19059200, 'steps': 37224, 'loss/train': 1.3819255828857422} +03/05/2022 09:13:23 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/05/2022 09:13:26 - INFO - codeparrot_training - Step 37225: {'lr': 0.0004333102281755922, 'samples': 19059712, 'steps': 37225, 'loss/train': 1.5102075338363647} +03/05/2022 09:13:29 - INFO - codeparrot_training - Step 37226: {'lr': 0.0004333066197129475, 'samples': 19060224, 'steps': 37226, 'loss/train': 1.6563533544540405} +03/05/2022 09:13:32 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 09:13:34 - INFO - codeparrot_training - Step 37227: {'lr': 0.00043330301116770777, 'samples': 19060736, 'steps': 37227, 'loss/train': 1.7835414409637451} +03/05/2022 09:13:37 - INFO - codeparrot_training - Step 37228: {'lr': 0.0004332994025398745, 'samples': 19061248, 'steps': 37228, 'loss/train': 0.7525253295898438} +03/05/2022 09:13:40 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 09:13:43 - INFO - codeparrot_training - Step 37229: {'lr': 0.0004332957938294493, 'samples': 19061760, 'steps': 37229, 'loss/train': 1.8023426532745361} +03/05/2022 09:13:46 - INFO - codeparrot_training - Step 37230: {'lr': 0.0004332921850364339, 'samples': 19062272, 'steps': 37230, 'loss/train': 0.955630362033844} +03/05/2022 09:13:48 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 09:13:51 - INFO - codeparrot_training - Step 37231: {'lr': 0.00043328857616082986, 'samples': 19062784, 'steps': 37231, 'loss/train': 1.55934739112854} +03/05/2022 09:13:54 - INFO - codeparrot_training - Step 37232: {'lr': 0.0004332849672026388, 'samples': 19063296, 'steps': 37232, 'loss/train': 1.3324642181396484} +03/05/2022 09:13:57 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/05/2022 09:14:00 - INFO - codeparrot_training - Step 37233: {'lr': 0.0004332813581618624, 'samples': 19063808, 'steps': 37233, 'loss/train': 1.6999443769454956} +03/05/2022 09:14:03 - INFO - codeparrot_training - Step 37234: {'lr': 0.00043327774903850226, 'samples': 19064320, 'steps': 37234, 'loss/train': 1.1778486967086792} +03/05/2022 09:14:05 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 09:14:08 - INFO - codeparrot_training - Step 37235: {'lr': 0.0004332741398325599, 'samples': 19064832, 'steps': 37235, 'loss/train': 1.6891238689422607} +03/05/2022 09:14:11 - INFO - codeparrot_training - Step 37236: {'lr': 0.00043327053054403707, 'samples': 19065344, 'steps': 37236, 'loss/train': 1.519893765449524} +03/05/2022 09:14:14 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 09:14:16 - INFO - codeparrot_training - Step 37237: {'lr': 0.0004332669211729354, 'samples': 19065856, 'steps': 37237, 'loss/train': 1.3129029273986816} +03/05/2022 09:14:20 - INFO - codeparrot_training - Step 37238: {'lr': 0.00043326331171925656, 'samples': 19066368, 'steps': 37238, 'loss/train': 1.398112416267395} +03/05/2022 09:14:22 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/05/2022 09:14:25 - INFO - codeparrot_training - Step 37239: {'lr': 0.000433259702183002, 'samples': 19066880, 'steps': 37239, 'loss/train': 1.9617180824279785} +03/05/2022 09:14:28 - INFO - codeparrot_training - Step 37240: {'lr': 0.0004332560925641734, 'samples': 19067392, 'steps': 37240, 'loss/train': 3.2054178714752197} +03/05/2022 09:14:30 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/05/2022 09:14:33 - INFO - codeparrot_training - Step 37241: {'lr': 0.0004332524828627725, 'samples': 19067904, 'steps': 37241, 'loss/train': 1.6829006671905518} +03/05/2022 09:14:37 - INFO - codeparrot_training - Step 37242: {'lr': 0.0004332488730788009, 'samples': 19068416, 'steps': 37242, 'loss/train': 1.6495716571807861} +03/05/2022 09:14:39 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/05/2022 09:14:42 - INFO - codeparrot_training - Step 37243: {'lr': 0.0004332452632122601, 'samples': 19068928, 'steps': 37243, 'loss/train': 1.2926064729690552} +03/05/2022 09:14:45 - INFO - codeparrot_training - Step 37244: {'lr': 0.0004332416532631519, 'samples': 19069440, 'steps': 37244, 'loss/train': 1.1981189250946045} +03/05/2022 09:14:47 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 09:14:50 - INFO - codeparrot_training - Step 37245: {'lr': 0.00043323804323147777, 'samples': 19069952, 'steps': 37245, 'loss/train': 1.364798665046692} +03/05/2022 09:14:53 - INFO - codeparrot_training - Step 37246: {'lr': 0.0004332344331172394, 'samples': 19070464, 'steps': 37246, 'loss/train': 1.69232976436615} +03/05/2022 09:14:56 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 09:14:59 - INFO - codeparrot_training - Step 37247: {'lr': 0.0004332308229204385, 'samples': 19070976, 'steps': 37247, 'loss/train': 1.5967638492584229} +03/05/2022 09:15:02 - INFO - codeparrot_training - Step 37248: {'lr': 0.00043322721264107657, 'samples': 19071488, 'steps': 37248, 'loss/train': 1.9966073036193848} +03/05/2022 09:15:04 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/05/2022 09:15:07 - INFO - codeparrot_training - Step 37249: {'lr': 0.00043322360227915526, 'samples': 19072000, 'steps': 37249, 'loss/train': 1.4784129858016968} +03/05/2022 09:15:10 - INFO - codeparrot_training - Step 37250: {'lr': 0.0004332199918346763, 'samples': 19072512, 'steps': 37250, 'loss/train': 1.0774372816085815} +03/05/2022 09:15:13 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 09:15:16 - INFO - codeparrot_training - Step 37251: {'lr': 0.00043321638130764116, 'samples': 19073024, 'steps': 37251, 'loss/train': 2.132511854171753} +03/05/2022 09:15:19 - INFO - codeparrot_training - Step 37252: {'lr': 0.00043321277069805153, 'samples': 19073536, 'steps': 37252, 'loss/train': 2.3566572666168213} +03/05/2022 09:15:21 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/05/2022 09:15:24 - INFO - codeparrot_training - Step 37253: {'lr': 0.0004332091600059091, 'samples': 19074048, 'steps': 37253, 'loss/train': 1.4322679042816162} +03/05/2022 09:15:27 - INFO - codeparrot_training - Step 37254: {'lr': 0.00043320554923121545, 'samples': 19074560, 'steps': 37254, 'loss/train': 1.9493342638015747} +03/05/2022 09:15:29 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/05/2022 09:15:33 - INFO - codeparrot_training - Step 37255: {'lr': 0.0004332019383739722, 'samples': 19075072, 'steps': 37255, 'loss/train': 1.4963582754135132} +03/05/2022 09:15:36 - INFO - codeparrot_training - Step 37256: {'lr': 0.000433198327434181, 'samples': 19075584, 'steps': 37256, 'loss/train': 0.601830005645752} +03/05/2022 09:15:38 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 09:15:41 - INFO - codeparrot_training - Step 37257: {'lr': 0.0004331947164118434, 'samples': 19076096, 'steps': 37257, 'loss/train': 1.796028971672058} +03/05/2022 09:15:44 - INFO - codeparrot_training - Step 37258: {'lr': 0.00043319110530696116, 'samples': 19076608, 'steps': 37258, 'loss/train': 1.6336404085159302} +03/05/2022 09:15:46 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/05/2022 09:15:50 - INFO - codeparrot_training - Step 37259: {'lr': 0.00043318749411953584, 'samples': 19077120, 'steps': 37259, 'loss/train': 0.884859561920166} +03/05/2022 09:15:53 - INFO - codeparrot_training - Step 37260: {'lr': 0.000433183882849569, 'samples': 19077632, 'steps': 37260, 'loss/train': 0.9211164712905884} +03/05/2022 09:15:56 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 09:15:59 - INFO - codeparrot_training - Step 37261: {'lr': 0.0004331802714970624, 'samples': 19078144, 'steps': 37261, 'loss/train': 1.486607313156128} +03/05/2022 09:16:02 - INFO - codeparrot_training - Step 37262: {'lr': 0.0004331766600620175, 'samples': 19078656, 'steps': 37262, 'loss/train': 1.5779222249984741} +03/05/2022 09:16:04 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 09:16:07 - INFO - codeparrot_training - Step 37263: {'lr': 0.00043317304854443607, 'samples': 19079168, 'steps': 37263, 'loss/train': 1.4708834886550903} +03/05/2022 09:16:10 - INFO - codeparrot_training - Step 37264: {'lr': 0.0004331694369443197, 'samples': 19079680, 'steps': 37264, 'loss/train': 1.3905649185180664} +03/05/2022 09:16:12 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 09:16:16 - INFO - codeparrot_training - Step 37265: {'lr': 0.00043316582526167004, 'samples': 19080192, 'steps': 37265, 'loss/train': 2.2101755142211914} +03/05/2022 09:16:19 - INFO - codeparrot_training - Step 37266: {'lr': 0.0004331622134964887, 'samples': 19080704, 'steps': 37266, 'loss/train': 1.789198637008667} +03/05/2022 09:16:21 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 09:16:24 - INFO - codeparrot_training - Step 37267: {'lr': 0.0004331586016487772, 'samples': 19081216, 'steps': 37267, 'loss/train': 1.5984159708023071} +03/05/2022 09:16:27 - INFO - codeparrot_training - Step 37268: {'lr': 0.00043315498971853726, 'samples': 19081728, 'steps': 37268, 'loss/train': 1.887995719909668} +03/05/2022 09:16:29 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 09:16:33 - INFO - codeparrot_training - Step 37269: {'lr': 0.0004331513777057706, 'samples': 19082240, 'steps': 37269, 'loss/train': 2.2215378284454346} +03/05/2022 09:16:36 - INFO - codeparrot_training - Step 37270: {'lr': 0.00043314776561047865, 'samples': 19082752, 'steps': 37270, 'loss/train': 2.156205177307129} +03/05/2022 09:16:40 - INFO - codeparrot_training - Step 37271: {'lr': 0.0004331441534326632, 'samples': 19083264, 'steps': 37271, 'loss/train': 2.110055685043335} +03/05/2022 09:16:41 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 09:16:45 - INFO - codeparrot_training - Step 37272: {'lr': 0.0004331405411723258, 'samples': 19083776, 'steps': 37272, 'loss/train': 1.8781111240386963} +03/05/2022 09:16:48 - INFO - codeparrot_training - Step 37273: {'lr': 0.0004331369288294681, 'samples': 19084288, 'steps': 37273, 'loss/train': 2.123711585998535} +03/05/2022 09:16:50 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/05/2022 09:16:53 - INFO - codeparrot_training - Step 37274: {'lr': 0.0004331333164040918, 'samples': 19084800, 'steps': 37274, 'loss/train': 1.5526198148727417} +03/05/2022 09:16:56 - INFO - codeparrot_training - Step 37275: {'lr': 0.0004331297038961984, 'samples': 19085312, 'steps': 37275, 'loss/train': 2.1994760036468506} +03/05/2022 09:16:58 - INFO - codeparrot_training - Skipping example with length 960 (seq_length=1024) +03/05/2022 09:17:02 - INFO - codeparrot_training - Step 37276: {'lr': 0.00043312609130578963, 'samples': 19085824, 'steps': 37276, 'loss/train': 1.8799238204956055} +03/05/2022 09:17:05 - INFO - codeparrot_training - Step 37277: {'lr': 0.000433122478632867, 'samples': 19086336, 'steps': 37277, 'loss/train': 0.898362398147583} +03/05/2022 09:17:08 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 09:17:10 - INFO - codeparrot_training - Step 37278: {'lr': 0.0004331188658774322, 'samples': 19086848, 'steps': 37278, 'loss/train': 0.8972975611686707} +03/05/2022 09:17:14 - INFO - codeparrot_training - Step 37279: {'lr': 0.00043311525303948685, 'samples': 19087360, 'steps': 37279, 'loss/train': 0.7990037798881531} +03/05/2022 09:17:16 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/05/2022 09:17:19 - INFO - codeparrot_training - Step 37280: {'lr': 0.0004331116401190327, 'samples': 19087872, 'steps': 37280, 'loss/train': 1.9313828945159912} +03/05/2022 09:17:22 - INFO - codeparrot_training - Step 37281: {'lr': 0.0004331080271160712, 'samples': 19088384, 'steps': 37281, 'loss/train': 3.274109363555908} +03/05/2022 09:17:25 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 09:17:27 - INFO - codeparrot_training - Step 37282: {'lr': 0.00043310441403060404, 'samples': 19088896, 'steps': 37282, 'loss/train': 3.413198947906494} +03/05/2022 09:17:31 - INFO - codeparrot_training - Step 37283: {'lr': 0.00043310080086263284, 'samples': 19089408, 'steps': 37283, 'loss/train': 0.8329365253448486} +03/05/2022 09:17:33 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/05/2022 09:17:36 - INFO - codeparrot_training - Step 37284: {'lr': 0.0004330971876121593, 'samples': 19089920, 'steps': 37284, 'loss/train': 1.246399164199829} +03/05/2022 09:17:39 - INFO - codeparrot_training - Step 37285: {'lr': 0.0004330935742791849, 'samples': 19090432, 'steps': 37285, 'loss/train': 1.1021398305892944} +03/05/2022 09:17:42 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 09:17:45 - INFO - codeparrot_training - Step 37286: {'lr': 0.00043308996086371146, 'samples': 19090944, 'steps': 37286, 'loss/train': 1.8378922939300537} +03/05/2022 09:17:48 - INFO - codeparrot_training - Step 37287: {'lr': 0.0004330863473657405, 'samples': 19091456, 'steps': 37287, 'loss/train': 1.7196879386901855} +03/05/2022 09:17:50 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/05/2022 09:17:53 - INFO - codeparrot_training - Step 37288: {'lr': 0.00043308273378527364, 'samples': 19091968, 'steps': 37288, 'loss/train': 1.3377351760864258} +03/05/2022 09:17:56 - INFO - codeparrot_training - Step 37289: {'lr': 0.00043307912012231255, 'samples': 19092480, 'steps': 37289, 'loss/train': 1.8187739849090576} +03/05/2022 09:17:58 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/05/2022 09:18:02 - INFO - codeparrot_training - Step 37290: {'lr': 0.0004330755063768588, 'samples': 19092992, 'steps': 37290, 'loss/train': 1.7614995241165161} +03/05/2022 09:18:05 - INFO - codeparrot_training - Step 37291: {'lr': 0.000433071892548914, 'samples': 19093504, 'steps': 37291, 'loss/train': 2.087303876876831} +03/05/2022 09:18:07 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 09:18:10 - INFO - codeparrot_training - Step 37292: {'lr': 0.00043306827863847985, 'samples': 19094016, 'steps': 37292, 'loss/train': 1.9073433876037598} +03/05/2022 09:18:13 - INFO - codeparrot_training - Step 37293: {'lr': 0.00043306466464555803, 'samples': 19094528, 'steps': 37293, 'loss/train': 1.662179708480835} +03/05/2022 09:18:16 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/05/2022 09:18:19 - INFO - codeparrot_training - Step 37294: {'lr': 0.0004330610505701501, 'samples': 19095040, 'steps': 37294, 'loss/train': 1.7765673398971558} +03/05/2022 09:18:22 - INFO - codeparrot_training - Step 37295: {'lr': 0.00043305743641225766, 'samples': 19095552, 'steps': 37295, 'loss/train': 1.4252513647079468} +03/05/2022 09:18:25 - INFO - codeparrot_training - Step 37296: {'lr': 0.00043305382217188225, 'samples': 19096064, 'steps': 37296, 'loss/train': 1.893965244293213} +03/05/2022 09:18:25 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 09:18:30 - INFO - codeparrot_training - Step 37297: {'lr': 0.0004330502078490258, 'samples': 19096576, 'steps': 37297, 'loss/train': 1.280961513519287} +03/05/2022 09:18:33 - INFO - codeparrot_training - Step 37298: {'lr': 0.0004330465934436896, 'samples': 19097088, 'steps': 37298, 'loss/train': 1.750510811805725} +03/05/2022 09:18:34 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/05/2022 09:18:39 - INFO - codeparrot_training - Step 37299: {'lr': 0.00043304297895587553, 'samples': 19097600, 'steps': 37299, 'loss/train': 1.2805050611495972} +03/05/2022 09:18:42 - INFO - codeparrot_training - Step 37300: {'lr': 0.0004330393643855851, 'samples': 19098112, 'steps': 37300, 'loss/train': 1.9552524089813232} +03/05/2022 09:18:42 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/05/2022 09:18:48 - INFO - codeparrot_training - Step 37301: {'lr': 0.0004330357497328199, 'samples': 19098624, 'steps': 37301, 'loss/train': 1.4127495288848877} +03/05/2022 09:18:51 - INFO - codeparrot_training - Step 37302: {'lr': 0.00043303213499758166, 'samples': 19099136, 'steps': 37302, 'loss/train': 1.7767469882965088} +03/05/2022 09:18:51 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 09:18:56 - INFO - codeparrot_training - Step 37303: {'lr': 0.00043302852017987196, 'samples': 19099648, 'steps': 37303, 'loss/train': 2.5363333225250244} +03/05/2022 09:18:59 - INFO - codeparrot_training - Step 37304: {'lr': 0.0004330249052796924, 'samples': 19100160, 'steps': 37304, 'loss/train': 1.5441447496414185} +03/05/2022 09:19:00 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/05/2022 09:19:04 - INFO - codeparrot_training - Step 37305: {'lr': 0.0004330212902970447, 'samples': 19100672, 'steps': 37305, 'loss/train': 1.6807595491409302} +03/05/2022 09:19:07 - INFO - codeparrot_training - Step 37306: {'lr': 0.0004330176752319304, 'samples': 19101184, 'steps': 37306, 'loss/train': 1.852253794670105} +03/05/2022 09:19:08 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 09:19:13 - INFO - codeparrot_training - Step 37307: {'lr': 0.0004330140600843512, 'samples': 19101696, 'steps': 37307, 'loss/train': 1.1957494020462036} +03/05/2022 09:19:16 - INFO - codeparrot_training - Step 37308: {'lr': 0.0004330104448543086, 'samples': 19102208, 'steps': 37308, 'loss/train': 1.0698621273040771} +03/05/2022 09:19:17 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 09:19:21 - INFO - codeparrot_training - Step 37309: {'lr': 0.0004330068295418044, 'samples': 19102720, 'steps': 37309, 'loss/train': 1.8032397031784058} +03/05/2022 09:19:25 - INFO - codeparrot_training - Step 37310: {'lr': 0.0004330032141468401, 'samples': 19103232, 'steps': 37310, 'loss/train': 1.57412588596344} +03/05/2022 09:19:26 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 09:19:30 - INFO - codeparrot_training - Step 37311: {'lr': 0.0004329995986694174, 'samples': 19103744, 'steps': 37311, 'loss/train': 1.9955803155899048} +03/05/2022 09:19:33 - INFO - codeparrot_training - Step 37312: {'lr': 0.00043299598310953793, 'samples': 19104256, 'steps': 37312, 'loss/train': 1.413143515586853} +03/05/2022 09:19:34 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 09:19:38 - INFO - codeparrot_training - Step 37313: {'lr': 0.0004329923674672032, 'samples': 19104768, 'steps': 37313, 'loss/train': 3.051741600036621} +03/05/2022 09:19:42 - INFO - codeparrot_training - Step 37314: {'lr': 0.00043298875174241504, 'samples': 19105280, 'steps': 37314, 'loss/train': 1.1036561727523804} +03/05/2022 09:19:43 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/05/2022 09:19:47 - INFO - codeparrot_training - Step 37315: {'lr': 0.00043298513593517483, 'samples': 19105792, 'steps': 37315, 'loss/train': 1.422577977180481} +03/05/2022 09:19:50 - INFO - codeparrot_training - Step 37316: {'lr': 0.0004329815200454845, 'samples': 19106304, 'steps': 37316, 'loss/train': 1.7480827569961548} +03/05/2022 09:19:51 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 09:19:55 - INFO - codeparrot_training - Step 37317: {'lr': 0.00043297790407334545, 'samples': 19106816, 'steps': 37317, 'loss/train': 1.4802038669586182} +03/05/2022 09:19:58 - INFO - codeparrot_training - Step 37318: {'lr': 0.0004329742880187594, 'samples': 19107328, 'steps': 37318, 'loss/train': 1.5103607177734375} +03/05/2022 09:20:00 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 09:20:04 - INFO - codeparrot_training - Step 37319: {'lr': 0.0004329706718817279, 'samples': 19107840, 'steps': 37319, 'loss/train': 1.5711520910263062} +03/05/2022 09:20:07 - INFO - codeparrot_training - Step 37320: {'lr': 0.00043296705566225267, 'samples': 19108352, 'steps': 37320, 'loss/train': 1.523724913597107} +03/05/2022 09:20:08 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 09:20:12 - INFO - codeparrot_training - Step 37321: {'lr': 0.00043296343936033535, 'samples': 19108864, 'steps': 37321, 'loss/train': 1.620537281036377} +03/05/2022 09:20:15 - INFO - codeparrot_training - Step 37322: {'lr': 0.0004329598229759775, 'samples': 19109376, 'steps': 37322, 'loss/train': 2.179577589035034} +03/05/2022 09:20:16 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 09:20:21 - INFO - codeparrot_training - Step 37323: {'lr': 0.00043295620650918076, 'samples': 19109888, 'steps': 37323, 'loss/train': 1.6708800792694092} +03/05/2022 09:20:24 - INFO - codeparrot_training - Step 37324: {'lr': 0.0004329525899599468, 'samples': 19110400, 'steps': 37324, 'loss/train': 1.5076006650924683} +03/05/2022 09:20:25 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 09:20:29 - INFO - codeparrot_training - Step 37325: {'lr': 0.0004329489733282772, 'samples': 19110912, 'steps': 37325, 'loss/train': 1.8476492166519165} +03/05/2022 09:20:32 - INFO - codeparrot_training - Step 37326: {'lr': 0.0004329453566141737, 'samples': 19111424, 'steps': 37326, 'loss/train': 1.854279637336731} +03/05/2022 09:20:33 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/05/2022 09:20:38 - INFO - codeparrot_training - Step 37327: {'lr': 0.00043294173981763776, 'samples': 19111936, 'steps': 37327, 'loss/train': 1.4228854179382324} +03/05/2022 09:20:41 - INFO - codeparrot_training - Step 37328: {'lr': 0.00043293812293867113, 'samples': 19112448, 'steps': 37328, 'loss/train': 1.4969847202301025} +03/05/2022 09:20:41 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 09:20:46 - INFO - codeparrot_training - Step 37329: {'lr': 0.0004329345059772754, 'samples': 19112960, 'steps': 37329, 'loss/train': 1.905001163482666} +03/05/2022 09:20:49 - INFO - codeparrot_training - Step 37330: {'lr': 0.0004329308889334522, 'samples': 19113472, 'steps': 37330, 'loss/train': 1.6224944591522217} +03/05/2022 09:20:50 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 09:20:54 - INFO - codeparrot_training - Step 37331: {'lr': 0.00043292727180720315, 'samples': 19113984, 'steps': 37331, 'loss/train': 1.2870073318481445} +03/05/2022 09:20:57 - INFO - codeparrot_training - Step 37332: {'lr': 0.0004329236545985299, 'samples': 19114496, 'steps': 37332, 'loss/train': 1.8432685136795044} +03/05/2022 09:20:58 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/05/2022 09:21:03 - INFO - codeparrot_training - Step 37333: {'lr': 0.000432920037307434, 'samples': 19115008, 'steps': 37333, 'loss/train': 1.5295342206954956} +03/05/2022 09:21:06 - INFO - codeparrot_training - Step 37334: {'lr': 0.00043291641993391727, 'samples': 19115520, 'steps': 37334, 'loss/train': 1.4443494081497192} +03/05/2022 09:21:07 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 09:21:11 - INFO - codeparrot_training - Step 37335: {'lr': 0.0004329128024779812, 'samples': 19116032, 'steps': 37335, 'loss/train': 1.9981673955917358} +03/05/2022 09:21:14 - INFO - codeparrot_training - Step 37336: {'lr': 0.0004329091849396274, 'samples': 19116544, 'steps': 37336, 'loss/train': 1.437608003616333} +03/05/2022 09:21:15 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/05/2022 09:21:20 - INFO - codeparrot_training - Step 37337: {'lr': 0.00043290556731885756, 'samples': 19117056, 'steps': 37337, 'loss/train': 1.7902567386627197} +03/05/2022 09:21:23 - INFO - codeparrot_training - Step 37338: {'lr': 0.0004329019496156733, 'samples': 19117568, 'steps': 37338, 'loss/train': 1.0266045331954956} +03/05/2022 09:21:24 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 09:21:28 - INFO - codeparrot_training - Step 37339: {'lr': 0.0004328983318300763, 'samples': 19118080, 'steps': 37339, 'loss/train': 1.2862045764923096} +03/05/2022 09:21:31 - INFO - codeparrot_training - Step 37340: {'lr': 0.00043289471396206803, 'samples': 19118592, 'steps': 37340, 'loss/train': 1.4169723987579346} +03/05/2022 09:21:32 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 09:21:37 - INFO - codeparrot_training - Step 37341: {'lr': 0.0004328910960116503, 'samples': 19119104, 'steps': 37341, 'loss/train': 1.9036922454833984} +03/05/2022 09:21:40 - INFO - codeparrot_training - Step 37342: {'lr': 0.00043288747797882467, 'samples': 19119616, 'steps': 37342, 'loss/train': 1.794769525527954} +03/05/2022 09:21:40 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/05/2022 09:21:45 - INFO - codeparrot_training - Step 37343: {'lr': 0.00043288385986359266, 'samples': 19120128, 'steps': 37343, 'loss/train': 1.5126090049743652} +03/05/2022 09:21:48 - INFO - codeparrot_training - Step 37344: {'lr': 0.00043288024166595614, 'samples': 19120640, 'steps': 37344, 'loss/train': 1.6213328838348389} +03/05/2022 09:21:49 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 09:21:53 - INFO - codeparrot_training - Step 37345: {'lr': 0.00043287662338591657, 'samples': 19121152, 'steps': 37345, 'loss/train': 0.5524718761444092} +03/05/2022 09:21:57 - INFO - codeparrot_training - Step 37346: {'lr': 0.0004328730050234756, 'samples': 19121664, 'steps': 37346, 'loss/train': 1.0178579092025757} +03/05/2022 09:21:57 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 09:22:02 - INFO - codeparrot_training - Step 37347: {'lr': 0.00043286938657863483, 'samples': 19122176, 'steps': 37347, 'loss/train': 1.230108380317688} +03/05/2022 09:22:05 - INFO - codeparrot_training - Step 37348: {'lr': 0.00043286576805139597, 'samples': 19122688, 'steps': 37348, 'loss/train': 2.077693462371826} +03/05/2022 09:22:05 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 09:22:11 - INFO - codeparrot_training - Step 37349: {'lr': 0.0004328621494417606, 'samples': 19123200, 'steps': 37349, 'loss/train': 1.875444769859314} +03/05/2022 09:22:14 - INFO - codeparrot_training - Step 37350: {'lr': 0.0004328585307497304, 'samples': 19123712, 'steps': 37350, 'loss/train': 2.4255075454711914} +03/05/2022 09:22:17 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 09:22:19 - INFO - codeparrot_training - Step 37351: {'lr': 0.00043285491197530694, 'samples': 19124224, 'steps': 37351, 'loss/train': 1.4060009717941284} +03/05/2022 09:22:22 - INFO - codeparrot_training - Step 37352: {'lr': 0.00043285129311849193, 'samples': 19124736, 'steps': 37352, 'loss/train': 1.169861912727356} +03/05/2022 09:22:25 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 09:22:28 - INFO - codeparrot_training - Step 37353: {'lr': 0.0004328476741792869, 'samples': 19125248, 'steps': 37353, 'loss/train': 1.882831335067749} +03/05/2022 09:22:31 - INFO - codeparrot_training - Step 37354: {'lr': 0.00043284405515769356, 'samples': 19125760, 'steps': 37354, 'loss/train': 0.7923739552497864} +03/05/2022 09:22:33 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/05/2022 09:22:36 - INFO - codeparrot_training - Step 37355: {'lr': 0.00043284043605371346, 'samples': 19126272, 'steps': 37355, 'loss/train': 0.6531121134757996} +03/05/2022 09:22:39 - INFO - codeparrot_training - Step 37356: {'lr': 0.0004328368168673483, 'samples': 19126784, 'steps': 37356, 'loss/train': 2.312175750732422} +03/05/2022 09:22:43 - INFO - codeparrot_training - Step 37357: {'lr': 0.00043283319759859974, 'samples': 19127296, 'steps': 37357, 'loss/train': 0.9761717915534973} +03/05/2022 09:22:43 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 09:22:48 - INFO - codeparrot_training - Step 37358: {'lr': 0.0004328295782474693, 'samples': 19127808, 'steps': 37358, 'loss/train': 1.2190566062927246} +03/05/2022 09:22:51 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 09:22:53 - INFO - codeparrot_training - Step 37359: {'lr': 0.0004328259588139587, 'samples': 19128320, 'steps': 37359, 'loss/train': 1.9386810064315796} +03/05/2022 09:22:56 - INFO - codeparrot_training - Step 37360: {'lr': 0.0004328223392980696, 'samples': 19128832, 'steps': 37360, 'loss/train': 1.3403894901275635} +03/05/2022 09:22:59 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 09:23:02 - INFO - codeparrot_training - Step 37361: {'lr': 0.00043281871969980346, 'samples': 19129344, 'steps': 37361, 'loss/train': 1.8786331415176392} +03/05/2022 09:23:05 - INFO - codeparrot_training - Step 37362: {'lr': 0.00043281510001916214, 'samples': 19129856, 'steps': 37362, 'loss/train': 2.19429874420166} +03/05/2022 09:23:08 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/05/2022 09:23:10 - INFO - codeparrot_training - Step 37363: {'lr': 0.0004328114802561471, 'samples': 19130368, 'steps': 37363, 'loss/train': 1.758787751197815} +03/05/2022 09:23:13 - INFO - codeparrot_training - Step 37364: {'lr': 0.00043280786041076006, 'samples': 19130880, 'steps': 37364, 'loss/train': 0.8903200626373291} +03/05/2022 09:23:16 - INFO - codeparrot_training - Step 37365: {'lr': 0.0004328042404830026, 'samples': 19131392, 'steps': 37365, 'loss/train': 1.2765926122665405} +03/05/2022 09:23:16 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 09:23:22 - INFO - codeparrot_training - Step 37366: {'lr': 0.0004328006204728763, 'samples': 19131904, 'steps': 37366, 'loss/train': 1.6903386116027832} +03/05/2022 09:23:25 - INFO - codeparrot_training - Step 37367: {'lr': 0.00043279700038038296, 'samples': 19132416, 'steps': 37367, 'loss/train': 1.1962471008300781} +03/05/2022 09:23:25 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/05/2022 09:23:31 - INFO - codeparrot_training - Step 37368: {'lr': 0.0004327933802055241, 'samples': 19132928, 'steps': 37368, 'loss/train': 1.5294065475463867} +03/05/2022 09:23:34 - INFO - codeparrot_training - Step 37369: {'lr': 0.0004327897599483013, 'samples': 19133440, 'steps': 37369, 'loss/train': 1.8739827871322632} +03/05/2022 09:23:35 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/05/2022 09:23:39 - INFO - codeparrot_training - Step 37370: {'lr': 0.00043278613960871624, 'samples': 19133952, 'steps': 37370, 'loss/train': 1.888419508934021} +03/05/2022 09:23:42 - INFO - codeparrot_training - Step 37371: {'lr': 0.00043278251918677066, 'samples': 19134464, 'steps': 37371, 'loss/train': 2.161224842071533} +03/05/2022 09:23:44 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 09:23:47 - INFO - codeparrot_training - Step 37372: {'lr': 0.00043277889868246605, 'samples': 19134976, 'steps': 37372, 'loss/train': 1.6573903560638428} +03/05/2022 09:23:51 - INFO - codeparrot_training - Step 37373: {'lr': 0.0004327752780958041, 'samples': 19135488, 'steps': 37373, 'loss/train': 2.260578155517578} +03/05/2022 09:23:52 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/05/2022 09:23:56 - INFO - codeparrot_training - Step 37374: {'lr': 0.0004327716574267864, 'samples': 19136000, 'steps': 37374, 'loss/train': 2.3852336406707764} +03/05/2022 09:23:59 - INFO - codeparrot_training - Step 37375: {'lr': 0.00043276803667541465, 'samples': 19136512, 'steps': 37375, 'loss/train': 1.8652197122573853} +03/05/2022 09:24:00 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/05/2022 09:24:04 - INFO - codeparrot_training - Step 37376: {'lr': 0.0004327644158416905, 'samples': 19137024, 'steps': 37376, 'loss/train': 2.2459471225738525} +03/05/2022 09:24:07 - INFO - codeparrot_training - Step 37377: {'lr': 0.0004327607949256154, 'samples': 19137536, 'steps': 37377, 'loss/train': 0.8888918161392212} +03/05/2022 09:24:09 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/05/2022 09:24:13 - INFO - codeparrot_training - Step 37378: {'lr': 0.00043275717392719115, 'samples': 19138048, 'steps': 37378, 'loss/train': 1.9757126569747925} +03/05/2022 09:24:16 - INFO - codeparrot_training - Step 37379: {'lr': 0.0004327535528464194, 'samples': 19138560, 'steps': 37379, 'loss/train': 1.7740944623947144} +03/05/2022 09:24:16 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/05/2022 09:24:22 - INFO - codeparrot_training - Step 37380: {'lr': 0.0004327499316833016, 'samples': 19139072, 'steps': 37380, 'loss/train': 2.1439859867095947} +03/05/2022 09:24:25 - INFO - codeparrot_training - Step 37381: {'lr': 0.0004327463104378395, 'samples': 19139584, 'steps': 37381, 'loss/train': 1.8235187530517578} +03/05/2022 09:24:28 - INFO - codeparrot_training - Step 37382: {'lr': 0.0004327426891100349, 'samples': 19140096, 'steps': 37382, 'loss/train': 2.2670650482177734} +03/05/2022 09:24:28 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 09:24:33 - INFO - codeparrot_training - Step 37383: {'lr': 0.0004327390676998891, 'samples': 19140608, 'steps': 37383, 'loss/train': 2.193467617034912} +03/05/2022 09:24:37 - INFO - codeparrot_training - Step 37384: {'lr': 0.000432735446207404, 'samples': 19141120, 'steps': 37384, 'loss/train': 1.4015774726867676} +03/05/2022 09:24:37 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/05/2022 09:24:42 - INFO - codeparrot_training - Step 37385: {'lr': 0.0004327318246325811, 'samples': 19141632, 'steps': 37385, 'loss/train': 1.4087187051773071} +03/05/2022 09:24:45 - INFO - codeparrot_training - Step 37386: {'lr': 0.000432728202975422, 'samples': 19142144, 'steps': 37386, 'loss/train': 0.49444064497947693} +03/05/2022 09:24:45 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 09:24:50 - INFO - codeparrot_training - Step 37387: {'lr': 0.0004327245812359285, 'samples': 19142656, 'steps': 37387, 'loss/train': 2.367577075958252} +03/05/2022 09:24:53 - INFO - codeparrot_training - Step 37388: {'lr': 0.000432720959414102, 'samples': 19143168, 'steps': 37388, 'loss/train': 1.1622065305709839} +03/05/2022 09:24:54 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 09:24:59 - INFO - codeparrot_training - Step 37389: {'lr': 0.00043271733750994436, 'samples': 19143680, 'steps': 37389, 'loss/train': 1.7819451093673706} +03/05/2022 09:25:02 - INFO - codeparrot_training - Step 37390: {'lr': 0.00043271371552345704, 'samples': 19144192, 'steps': 37390, 'loss/train': 2.125990867614746} +03/05/2022 09:25:02 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/05/2022 09:25:07 - INFO - codeparrot_training - Step 37391: {'lr': 0.00043271009345464175, 'samples': 19144704, 'steps': 37391, 'loss/train': 0.4303770065307617} +03/05/2022 09:25:10 - INFO - codeparrot_training - Step 37392: {'lr': 0.0004327064713035002, 'samples': 19145216, 'steps': 37392, 'loss/train': 0.6421830654144287} +03/05/2022 09:25:11 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 09:25:15 - INFO - codeparrot_training - Step 37393: {'lr': 0.00043270284907003377, 'samples': 19145728, 'steps': 37393, 'loss/train': 1.5644527673721313} +03/05/2022 09:25:19 - INFO - codeparrot_training - Step 37394: {'lr': 0.0004326992267542443, 'samples': 19146240, 'steps': 37394, 'loss/train': 1.5239957571029663} +03/05/2022 09:25:19 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/05/2022 09:25:24 - INFO - codeparrot_training - Step 37395: {'lr': 0.0004326956043561335, 'samples': 19146752, 'steps': 37395, 'loss/train': 1.8742051124572754} +03/05/2022 09:25:27 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 09:25:29 - INFO - codeparrot_training - Step 37396: {'lr': 0.0004326919818757028, 'samples': 19147264, 'steps': 37396, 'loss/train': 1.6692909002304077} +03/05/2022 09:25:32 - INFO - codeparrot_training - Step 37397: {'lr': 0.00043268835931295393, 'samples': 19147776, 'steps': 37397, 'loss/train': 2.424363374710083} +03/05/2022 09:25:36 - INFO - codeparrot_training - Step 37398: {'lr': 0.00043268473666788844, 'samples': 19148288, 'steps': 37398, 'loss/train': 1.2113555669784546} +03/05/2022 09:25:36 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 09:25:41 - INFO - codeparrot_training - Step 37399: {'lr': 0.0004326811139405081, 'samples': 19148800, 'steps': 37399, 'loss/train': 1.6166456937789917} +03/05/2022 09:25:44 - INFO - codeparrot_training - Step 37400: {'lr': 0.0004326774911308145, 'samples': 19149312, 'steps': 37400, 'loss/train': 1.489588975906372} +03/05/2022 09:25:44 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 09:25:49 - INFO - codeparrot_training - Step 37401: {'lr': 0.00043267386823880904, 'samples': 19149824, 'steps': 37401, 'loss/train': 1.8628411293029785} +03/05/2022 09:25:52 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/05/2022 09:25:55 - INFO - codeparrot_training - Step 37402: {'lr': 0.00043267024526449374, 'samples': 19150336, 'steps': 37402, 'loss/train': 1.6013396978378296} +03/05/2022 09:25:58 - INFO - codeparrot_training - Step 37403: {'lr': 0.00043266662220787003, 'samples': 19150848, 'steps': 37403, 'loss/train': 1.6813979148864746} +03/05/2022 09:26:01 - INFO - codeparrot_training - Step 37404: {'lr': 0.0004326629990689395, 'samples': 19151360, 'steps': 37404, 'loss/train': 1.1357797384262085} +03/05/2022 09:26:01 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 09:26:06 - INFO - codeparrot_training - Step 37405: {'lr': 0.0004326593758477039, 'samples': 19151872, 'steps': 37405, 'loss/train': 2.0731494426727295} +03/05/2022 09:26:09 - INFO - codeparrot_training - Step 37406: {'lr': 0.0004326557525441648, 'samples': 19152384, 'steps': 37406, 'loss/train': 1.6206623315811157} +03/05/2022 09:26:09 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/05/2022 09:26:15 - INFO - codeparrot_training - Step 37407: {'lr': 0.00043265212915832374, 'samples': 19152896, 'steps': 37407, 'loss/train': 0.9375823736190796} +03/05/2022 09:26:18 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/05/2022 09:26:20 - INFO - codeparrot_training - Step 37408: {'lr': 0.00043264850569018254, 'samples': 19153408, 'steps': 37408, 'loss/train': 1.8092085123062134} +03/05/2022 09:26:23 - INFO - codeparrot_training - Step 37409: {'lr': 0.00043264488213974275, 'samples': 19153920, 'steps': 37409, 'loss/train': 1.822318434715271} +03/05/2022 09:26:26 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/05/2022 09:26:28 - INFO - codeparrot_training - Step 37410: {'lr': 0.000432641258507006, 'samples': 19154432, 'steps': 37410, 'loss/train': 2.377807855606079} +03/05/2022 09:26:32 - INFO - codeparrot_training - Step 37411: {'lr': 0.0004326376347919738, 'samples': 19154944, 'steps': 37411, 'loss/train': 1.078521728515625} +03/05/2022 09:26:34 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 09:26:37 - INFO - codeparrot_training - Step 37412: {'lr': 0.00043263401099464805, 'samples': 19155456, 'steps': 37412, 'loss/train': 1.5230066776275635} +03/05/2022 09:26:40 - INFO - codeparrot_training - Step 37413: {'lr': 0.00043263038711503017, 'samples': 19155968, 'steps': 37413, 'loss/train': 1.4974355697631836} +03/05/2022 09:26:43 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/05/2022 09:26:45 - INFO - codeparrot_training - Step 37414: {'lr': 0.00043262676315312183, 'samples': 19156480, 'steps': 37414, 'loss/train': 1.9302799701690674} +03/05/2022 09:26:49 - INFO - codeparrot_training - Step 37415: {'lr': 0.0004326231391089247, 'samples': 19156992, 'steps': 37415, 'loss/train': 0.581866979598999} +03/05/2022 09:26:52 - INFO - codeparrot_training - Step 37416: {'lr': 0.00043261951498244045, 'samples': 19157504, 'steps': 37416, 'loss/train': 2.203850030899048} +03/05/2022 09:26:52 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/05/2022 09:26:57 - INFO - codeparrot_training - Step 37417: {'lr': 0.0004326158907736706, 'samples': 19158016, 'steps': 37417, 'loss/train': 1.3513565063476562} +03/05/2022 09:27:00 - INFO - codeparrot_training - Step 37418: {'lr': 0.00043261226648261687, 'samples': 19158528, 'steps': 37418, 'loss/train': 1.1556529998779297} +03/05/2022 09:27:01 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 09:27:06 - INFO - codeparrot_training - Step 37419: {'lr': 0.0004326086421092809, 'samples': 19159040, 'steps': 37419, 'loss/train': 2.027801513671875} +03/05/2022 09:27:09 - INFO - codeparrot_training - Step 37420: {'lr': 0.00043260501765366425, 'samples': 19159552, 'steps': 37420, 'loss/train': 1.628307819366455} +03/05/2022 09:27:09 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/05/2022 09:27:14 - INFO - codeparrot_training - Step 37421: {'lr': 0.00043260139311576863, 'samples': 19160064, 'steps': 37421, 'loss/train': 1.086280345916748} +03/05/2022 09:27:17 - INFO - codeparrot_training - Step 37422: {'lr': 0.0004325977684955956, 'samples': 19160576, 'steps': 37422, 'loss/train': 2.0649948120117188} +03/05/2022 09:27:17 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 09:27:22 - INFO - codeparrot_training - Step 37423: {'lr': 0.0004325941437931469, 'samples': 19161088, 'steps': 37423, 'loss/train': 1.108130693435669} +03/05/2022 09:27:25 - INFO - codeparrot_training - Step 37424: {'lr': 0.0004325905190084241, 'samples': 19161600, 'steps': 37424, 'loss/train': 1.203427791595459} +03/05/2022 09:27:26 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/05/2022 09:27:31 - INFO - codeparrot_training - Step 37425: {'lr': 0.00043258689414142875, 'samples': 19162112, 'steps': 37425, 'loss/train': 2.662029504776001} +03/05/2022 09:27:34 - INFO - codeparrot_training - Step 37426: {'lr': 0.0004325832691921626, 'samples': 19162624, 'steps': 37426, 'loss/train': 3.108893632888794} +03/05/2022 09:27:34 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/05/2022 09:27:39 - INFO - codeparrot_training - Step 37427: {'lr': 0.00043257964416062723, 'samples': 19163136, 'steps': 37427, 'loss/train': 3.2600698471069336} +03/05/2022 09:27:43 - INFO - codeparrot_training - Step 37428: {'lr': 0.0004325760190468243, 'samples': 19163648, 'steps': 37428, 'loss/train': 2.409083843231201} +03/05/2022 09:27:43 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/05/2022 09:27:48 - INFO - codeparrot_training - Step 37429: {'lr': 0.0004325723938507555, 'samples': 19164160, 'steps': 37429, 'loss/train': 1.5136232376098633} +03/05/2022 09:27:51 - INFO - codeparrot_training - Step 37430: {'lr': 0.0004325687685724223, 'samples': 19164672, 'steps': 37430, 'loss/train': 1.3081026077270508} +03/05/2022 09:27:51 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 09:27:57 - INFO - codeparrot_training - Step 37431: {'lr': 0.0004325651432118265, 'samples': 19165184, 'steps': 37431, 'loss/train': 1.9557230472564697} +03/05/2022 09:28:00 - INFO - codeparrot_training - Step 37432: {'lr': 0.00043256151776896955, 'samples': 19165696, 'steps': 37432, 'loss/train': 1.540717601776123} +03/05/2022 09:28:00 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/05/2022 09:28:05 - INFO - codeparrot_training - Step 37433: {'lr': 0.0004325578922438533, 'samples': 19166208, 'steps': 37433, 'loss/train': 2.069904327392578} +03/05/2022 09:28:08 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/05/2022 09:28:10 - INFO - codeparrot_training - Step 37434: {'lr': 0.0004325542666364793, 'samples': 19166720, 'steps': 37434, 'loss/train': 1.5535534620285034} +03/05/2022 09:28:14 - INFO - codeparrot_training - Step 37435: {'lr': 0.00043255064094684917, 'samples': 19167232, 'steps': 37435, 'loss/train': 0.9031588435173035} +03/05/2022 09:28:16 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/05/2022 09:28:19 - INFO - codeparrot_training - Step 37436: {'lr': 0.0004325470151749644, 'samples': 19167744, 'steps': 37436, 'loss/train': 1.0802795886993408} +03/05/2022 09:28:22 - INFO - codeparrot_training - Step 37437: {'lr': 0.00043254338932082696, 'samples': 19168256, 'steps': 37437, 'loss/train': 0.5024791955947876} +03/05/2022 09:28:24 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 09:28:27 - INFO - codeparrot_training - Step 37438: {'lr': 0.00043253976338443814, 'samples': 19168768, 'steps': 37438, 'loss/train': 1.8215317726135254} +03/05/2022 09:28:30 - INFO - codeparrot_training - Step 37439: {'lr': 0.00043253613736579975, 'samples': 19169280, 'steps': 37439, 'loss/train': 1.8153842687606812} +03/05/2022 09:28:33 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/05/2022 09:28:36 - INFO - codeparrot_training - Step 37440: {'lr': 0.0004325325112649134, 'samples': 19169792, 'steps': 37440, 'loss/train': 1.5196151733398438} +03/05/2022 09:28:39 - INFO - codeparrot_training - Step 37441: {'lr': 0.00043252888508178066, 'samples': 19170304, 'steps': 37441, 'loss/train': 1.6854519844055176} +03/05/2022 09:28:41 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 09:28:44 - INFO - codeparrot_training - Step 37442: {'lr': 0.0004325252588164033, 'samples': 19170816, 'steps': 37442, 'loss/train': 1.7017935514450073} +03/05/2022 09:28:47 - INFO - codeparrot_training - Step 37443: {'lr': 0.00043252163246878286, 'samples': 19171328, 'steps': 37443, 'loss/train': 1.8897960186004639} +03/05/2022 09:28:50 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 09:28:53 - INFO - codeparrot_training - Step 37444: {'lr': 0.000432518006038921, 'samples': 19171840, 'steps': 37444, 'loss/train': 1.2633941173553467} +03/05/2022 09:28:56 - INFO - codeparrot_training - Step 37445: {'lr': 0.00043251437952681926, 'samples': 19172352, 'steps': 37445, 'loss/train': 2.384800910949707} +03/05/2022 09:28:58 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/05/2022 09:29:01 - INFO - codeparrot_training - Step 37446: {'lr': 0.0004325107529324795, 'samples': 19172864, 'steps': 37446, 'loss/train': 1.2917897701263428} +03/05/2022 09:29:04 - INFO - codeparrot_training - Step 37447: {'lr': 0.0004325071262559031, 'samples': 19173376, 'steps': 37447, 'loss/train': 1.393094539642334} +03/05/2022 09:29:06 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/05/2022 09:29:10 - INFO - codeparrot_training - Step 37448: {'lr': 0.00043250349949709184, 'samples': 19173888, 'steps': 37448, 'loss/train': 0.7384893298149109} +03/05/2022 09:29:13 - INFO - codeparrot_training - Step 37449: {'lr': 0.0004324998726560473, 'samples': 19174400, 'steps': 37449, 'loss/train': 1.1492565870285034} +03/05/2022 09:29:15 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 09:29:18 - INFO - codeparrot_training - Step 37450: {'lr': 0.0004324962457327712, 'samples': 19174912, 'steps': 37450, 'loss/train': 2.8066797256469727} +03/05/2022 09:29:21 - INFO - codeparrot_training - Step 37451: {'lr': 0.00043249261872726504, 'samples': 19175424, 'steps': 37451, 'loss/train': 2.434494972229004} +03/05/2022 09:29:24 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/05/2022 09:29:26 - INFO - codeparrot_training - Step 37452: {'lr': 0.0004324889916395305, 'samples': 19175936, 'steps': 37452, 'loss/train': 1.1735317707061768} +03/05/2022 09:29:30 - INFO - codeparrot_training - Step 37453: {'lr': 0.0004324853644695693, 'samples': 19176448, 'steps': 37453, 'loss/train': 1.2539751529693604} +03/05/2022 09:29:32 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 09:29:35 - INFO - codeparrot_training - Step 37454: {'lr': 0.000432481737217383, 'samples': 19176960, 'steps': 37454, 'loss/train': 2.0843911170959473} +03/05/2022 09:29:38 - INFO - codeparrot_training - Step 37455: {'lr': 0.0004324781098829732, 'samples': 19177472, 'steps': 37455, 'loss/train': 1.8575918674468994} +03/05/2022 09:29:40 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 09:29:44 - INFO - codeparrot_training - Step 37456: {'lr': 0.0004324744824663417, 'samples': 19177984, 'steps': 37456, 'loss/train': 1.7395952939987183} +03/05/2022 09:29:47 - INFO - codeparrot_training - Step 37457: {'lr': 0.00043247085496748983, 'samples': 19178496, 'steps': 37457, 'loss/train': 2.257606267929077} +03/05/2022 09:29:49 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 09:29:52 - INFO - codeparrot_training - Step 37458: {'lr': 0.0004324672273864195, 'samples': 19179008, 'steps': 37458, 'loss/train': 1.938042163848877} +03/05/2022 09:29:55 - INFO - codeparrot_training - Step 37459: {'lr': 0.00043246359972313233, 'samples': 19179520, 'steps': 37459, 'loss/train': 1.6560139656066895} +03/05/2022 09:29:58 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 09:30:01 - INFO - codeparrot_training - Step 37460: {'lr': 0.0004324599719776298, 'samples': 19180032, 'steps': 37460, 'loss/train': 1.7937334775924683} +03/05/2022 09:30:04 - INFO - codeparrot_training - Step 37461: {'lr': 0.00043245634414991365, 'samples': 19180544, 'steps': 37461, 'loss/train': 1.9208468198776245} +03/05/2022 09:30:06 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 09:30:09 - INFO - codeparrot_training - Step 37462: {'lr': 0.0004324527162399854, 'samples': 19181056, 'steps': 37462, 'loss/train': 1.4209200143814087} +03/05/2022 09:30:12 - INFO - codeparrot_training - Step 37463: {'lr': 0.0004324490882478469, 'samples': 19181568, 'steps': 37463, 'loss/train': 1.706996202468872} +03/05/2022 09:30:14 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 09:30:17 - INFO - codeparrot_training - Step 37464: {'lr': 0.0004324454601734995, 'samples': 19182080, 'steps': 37464, 'loss/train': 1.937192440032959} +03/05/2022 09:30:21 - INFO - codeparrot_training - Step 37465: {'lr': 0.0004324418320169451, 'samples': 19182592, 'steps': 37465, 'loss/train': 1.8528603315353394} +03/05/2022 09:30:23 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/05/2022 09:30:26 - INFO - codeparrot_training - Step 37466: {'lr': 0.00043243820377818524, 'samples': 19183104, 'steps': 37466, 'loss/train': 1.6546454429626465} +03/05/2022 09:30:29 - INFO - codeparrot_training - Step 37467: {'lr': 0.0004324345754572215, 'samples': 19183616, 'steps': 37467, 'loss/train': 1.8930600881576538} +03/05/2022 09:30:31 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/05/2022 09:30:34 - INFO - codeparrot_training - Step 37468: {'lr': 0.00043243094705405554, 'samples': 19184128, 'steps': 37468, 'loss/train': 1.9466514587402344} +03/05/2022 09:30:37 - INFO - codeparrot_training - Step 37469: {'lr': 0.0004324273185686891, 'samples': 19184640, 'steps': 37469, 'loss/train': 1.7030160427093506} +03/05/2022 09:30:40 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 09:30:43 - INFO - codeparrot_training - Step 37470: {'lr': 0.00043242369000112365, 'samples': 19185152, 'steps': 37470, 'loss/train': 1.3260201215744019} +03/05/2022 09:30:46 - INFO - codeparrot_training - Step 37471: {'lr': 0.00043242006135136093, 'samples': 19185664, 'steps': 37471, 'loss/train': 1.7834943532943726} +03/05/2022 09:30:48 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/05/2022 09:30:51 - INFO - codeparrot_training - Step 37472: {'lr': 0.00043241643261940246, 'samples': 19186176, 'steps': 37472, 'loss/train': 1.5661413669586182} +03/05/2022 09:30:54 - INFO - codeparrot_training - Step 37473: {'lr': 0.00043241280380525003, 'samples': 19186688, 'steps': 37473, 'loss/train': 1.5868439674377441} +03/05/2022 09:30:56 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/05/2022 09:31:00 - INFO - codeparrot_training - Step 37474: {'lr': 0.0004324091749089052, 'samples': 19187200, 'steps': 37474, 'loss/train': 2.5541281700134277} +03/05/2022 09:31:03 - INFO - codeparrot_training - Step 37475: {'lr': 0.0004324055459303696, 'samples': 19187712, 'steps': 37475, 'loss/train': 1.5561670064926147} +03/05/2022 09:31:05 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 09:31:08 - INFO - codeparrot_training - Step 37476: {'lr': 0.00043240191686964494, 'samples': 19188224, 'steps': 37476, 'loss/train': 1.0318560600280762} +03/05/2022 09:31:11 - INFO - codeparrot_training - Step 37477: {'lr': 0.00043239828772673276, 'samples': 19188736, 'steps': 37477, 'loss/train': 0.7365391850471497} +03/05/2022 09:31:13 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/05/2022 09:31:17 - INFO - codeparrot_training - Step 37478: {'lr': 0.0004323946585016347, 'samples': 19189248, 'steps': 37478, 'loss/train': 2.1114883422851562} +03/05/2022 09:31:20 - INFO - codeparrot_training - Step 37479: {'lr': 0.00043239102919435235, 'samples': 19189760, 'steps': 37479, 'loss/train': 2.0043885707855225} +03/05/2022 09:31:22 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 09:31:26 - INFO - codeparrot_training - Step 37480: {'lr': 0.0004323873998048875, 'samples': 19190272, 'steps': 37480, 'loss/train': 2.0010740756988525} +03/05/2022 09:31:29 - INFO - codeparrot_training - Step 37481: {'lr': 0.00043238377033324175, 'samples': 19190784, 'steps': 37481, 'loss/train': 0.9142195582389832} +03/05/2022 09:31:32 - INFO - codeparrot_training - Step 37482: {'lr': 0.00043238014077941656, 'samples': 19191296, 'steps': 37482, 'loss/train': 2.0439987182617188} +03/05/2022 09:31:33 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 09:31:37 - INFO - codeparrot_training - Step 37483: {'lr': 0.00043237651114341383, 'samples': 19191808, 'steps': 37483, 'loss/train': 2.0177478790283203} +03/05/2022 09:31:40 - INFO - codeparrot_training - Step 37484: {'lr': 0.00043237288142523503, 'samples': 19192320, 'steps': 37484, 'loss/train': 1.9741607904434204} +03/05/2022 09:31:42 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 09:31:46 - INFO - codeparrot_training - Step 37485: {'lr': 0.00043236925162488173, 'samples': 19192832, 'steps': 37485, 'loss/train': 1.295021653175354} +03/05/2022 09:31:49 - INFO - codeparrot_training - Step 37486: {'lr': 0.0004323656217423557, 'samples': 19193344, 'steps': 37486, 'loss/train': 1.6139222383499146} +03/05/2022 09:31:50 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 09:31:54 - INFO - codeparrot_training - Step 37487: {'lr': 0.00043236199177765856, 'samples': 19193856, 'steps': 37487, 'loss/train': 1.3925421237945557} +03/05/2022 09:31:57 - INFO - codeparrot_training - Step 37488: {'lr': 0.0004323583617307919, 'samples': 19194368, 'steps': 37488, 'loss/train': 1.7477308511734009} +03/05/2022 09:31:59 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/05/2022 09:32:03 - INFO - codeparrot_training - Step 37489: {'lr': 0.00043235473160175745, 'samples': 19194880, 'steps': 37489, 'loss/train': 0.7363441586494446} +03/05/2022 09:32:06 - INFO - codeparrot_training - Step 37490: {'lr': 0.0004323511013905567, 'samples': 19195392, 'steps': 37490, 'loss/train': 1.5590685606002808} +03/05/2022 09:32:07 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 09:32:11 - INFO - codeparrot_training - Step 37491: {'lr': 0.0004323474710971913, 'samples': 19195904, 'steps': 37491, 'loss/train': 0.8319467902183533} +03/05/2022 09:32:14 - INFO - codeparrot_training - Step 37492: {'lr': 0.0004323438407216631, 'samples': 19196416, 'steps': 37492, 'loss/train': 1.8525711297988892} +03/05/2022 09:32:16 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 09:32:19 - INFO - codeparrot_training - Step 37493: {'lr': 0.0004323402102639734, 'samples': 19196928, 'steps': 37493, 'loss/train': 0.8309668898582458} +03/05/2022 09:32:23 - INFO - codeparrot_training - Step 37494: {'lr': 0.00043233657972412414, 'samples': 19197440, 'steps': 37494, 'loss/train': 1.7445950508117676} +03/05/2022 09:32:24 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/05/2022 09:32:28 - INFO - codeparrot_training - Step 37495: {'lr': 0.00043233294910211684, 'samples': 19197952, 'steps': 37495, 'loss/train': 1.6145466566085815} +03/05/2022 09:32:31 - INFO - codeparrot_training - Step 37496: {'lr': 0.0004323293183979531, 'samples': 19198464, 'steps': 37496, 'loss/train': 2.1352810859680176} +03/05/2022 09:32:33 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 09:32:36 - INFO - codeparrot_training - Step 37497: {'lr': 0.0004323256876116345, 'samples': 19198976, 'steps': 37497, 'loss/train': 1.6139819622039795} +03/05/2022 09:32:40 - INFO - codeparrot_training - Step 37498: {'lr': 0.0004323220567431628, 'samples': 19199488, 'steps': 37498, 'loss/train': 1.8670103549957275} +03/05/2022 09:32:41 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 09:32:45 - INFO - codeparrot_training - Step 37499: {'lr': 0.0004323184257925397, 'samples': 19200000, 'steps': 37499, 'loss/train': 2.119373083114624} +03/05/2022 09:32:48 - INFO - codeparrot_training - Step 37500: {'lr': 0.0004323147947597667, 'samples': 19200512, 'steps': 37500, 'loss/train': 1.26805579662323} +03/05/2022 09:32:49 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 09:32:53 - INFO - codeparrot_training - Step 37501: {'lr': 0.00043231116364484534, 'samples': 19201024, 'steps': 37501, 'loss/train': 1.3774335384368896} +03/05/2022 09:32:56 - INFO - codeparrot_training - Step 37502: {'lr': 0.00043230753244777743, 'samples': 19201536, 'steps': 37502, 'loss/train': 1.7888596057891846} +03/05/2022 09:32:58 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/05/2022 09:33:02 - INFO - codeparrot_training - Step 37503: {'lr': 0.00043230390116856467, 'samples': 19202048, 'steps': 37503, 'loss/train': 1.1760632991790771} +03/05/2022 09:33:05 - INFO - codeparrot_training - Step 37504: {'lr': 0.00043230026980720847, 'samples': 19202560, 'steps': 37504, 'loss/train': 1.0543557405471802} +03/05/2022 09:33:07 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 09:33:10 - INFO - codeparrot_training - Step 37505: {'lr': 0.00043229663836371056, 'samples': 19203072, 'steps': 37505, 'loss/train': 2.2733519077301025} +03/05/2022 09:33:13 - INFO - codeparrot_training - Step 37506: {'lr': 0.0004322930068380727, 'samples': 19203584, 'steps': 37506, 'loss/train': 1.0625003576278687} +03/05/2022 09:33:15 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 09:33:19 - INFO - codeparrot_training - Step 37507: {'lr': 0.00043228937523029636, 'samples': 19204096, 'steps': 37507, 'loss/train': 2.0091893672943115} +03/05/2022 09:33:22 - INFO - codeparrot_training - Step 37508: {'lr': 0.00043228574354038326, 'samples': 19204608, 'steps': 37508, 'loss/train': 1.6610870361328125} +03/05/2022 09:33:23 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 09:33:27 - INFO - codeparrot_training - Step 37509: {'lr': 0.00043228211176833496, 'samples': 19205120, 'steps': 37509, 'loss/train': 2.087128162384033} +03/05/2022 09:33:30 - INFO - codeparrot_training - Step 37510: {'lr': 0.00043227847991415326, 'samples': 19205632, 'steps': 37510, 'loss/train': 0.7618759870529175} +03/05/2022 09:33:32 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/05/2022 09:33:36 - INFO - codeparrot_training - Step 37511: {'lr': 0.00043227484797783965, 'samples': 19206144, 'steps': 37511, 'loss/train': 1.8809733390808105} +03/05/2022 09:33:39 - INFO - codeparrot_training - Step 37512: {'lr': 0.0004322712159593958, 'samples': 19206656, 'steps': 37512, 'loss/train': 1.8175616264343262} +03/05/2022 09:33:40 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 09:33:44 - INFO - codeparrot_training - Step 37513: {'lr': 0.0004322675838588234, 'samples': 19207168, 'steps': 37513, 'loss/train': 1.6241227388381958} +03/05/2022 09:33:47 - INFO - codeparrot_training - Step 37514: {'lr': 0.0004322639516761239, 'samples': 19207680, 'steps': 37514, 'loss/train': 1.305985927581787} +03/05/2022 09:33:48 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 09:33:53 - INFO - codeparrot_training - Step 37515: {'lr': 0.0004322603194112992, 'samples': 19208192, 'steps': 37515, 'loss/train': 2.0368423461914062} +03/05/2022 09:33:56 - INFO - codeparrot_training - Step 37516: {'lr': 0.00043225668706435073, 'samples': 19208704, 'steps': 37516, 'loss/train': 1.499269962310791} +03/05/2022 09:33:57 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 09:34:01 - INFO - codeparrot_training - Step 37517: {'lr': 0.0004322530546352803, 'samples': 19209216, 'steps': 37517, 'loss/train': 1.9702339172363281} +03/05/2022 09:34:04 - INFO - codeparrot_training - Step 37518: {'lr': 0.0004322494221240894, 'samples': 19209728, 'steps': 37518, 'loss/train': 0.7950683832168579} +03/05/2022 09:34:05 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 09:34:09 - INFO - codeparrot_training - Step 37519: {'lr': 0.0004322457895307797, 'samples': 19210240, 'steps': 37519, 'loss/train': 1.7383122444152832} +03/05/2022 09:34:13 - INFO - codeparrot_training - Step 37520: {'lr': 0.00043224215685535287, 'samples': 19210752, 'steps': 37520, 'loss/train': 0.86811363697052} +03/05/2022 09:34:14 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 09:34:18 - INFO - codeparrot_training - Step 37521: {'lr': 0.0004322385240978106, 'samples': 19211264, 'steps': 37521, 'loss/train': 1.3301078081130981} +03/05/2022 09:34:21 - INFO - codeparrot_training - Step 37522: {'lr': 0.0004322348912581544, 'samples': 19211776, 'steps': 37522, 'loss/train': 1.0161796808242798} +03/05/2022 09:34:25 - INFO - codeparrot_training - Step 37523: {'lr': 0.000432231258336386, 'samples': 19212288, 'steps': 37523, 'loss/train': 0.1943114995956421} +03/05/2022 09:34:25 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 09:34:30 - INFO - codeparrot_training - Step 37524: {'lr': 0.000432227625332507, 'samples': 19212800, 'steps': 37524, 'loss/train': 1.4971718788146973} +03/05/2022 09:34:33 - INFO - codeparrot_training - Step 37525: {'lr': 0.000432223992246519, 'samples': 19213312, 'steps': 37525, 'loss/train': 1.5043072700500488} +03/05/2022 09:34:33 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/05/2022 09:34:38 - INFO - codeparrot_training - Step 37526: {'lr': 0.0004322203590784237, 'samples': 19213824, 'steps': 37526, 'loss/train': 2.3293063640594482} +03/05/2022 09:34:42 - INFO - codeparrot_training - Step 37527: {'lr': 0.0004322167258282228, 'samples': 19214336, 'steps': 37527, 'loss/train': 1.7127904891967773} +03/05/2022 09:34:47 - INFO - codeparrot_training - Step 37528: {'lr': 0.0004322130924959178, 'samples': 19214848, 'steps': 37528, 'loss/train': 2.4229848384857178} +03/05/2022 09:34:50 - INFO - codeparrot_training - Step 37529: {'lr': 0.0004322094590815104, 'samples': 19215360, 'steps': 37529, 'loss/train': 1.373903512954712} +03/05/2022 09:34:50 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/05/2022 09:34:55 - INFO - codeparrot_training - Step 37530: {'lr': 0.00043220582558500223, 'samples': 19215872, 'steps': 37530, 'loss/train': 1.893951177597046} +03/05/2022 09:34:59 - INFO - codeparrot_training - Step 37531: {'lr': 0.00043220219200639485, 'samples': 19216384, 'steps': 37531, 'loss/train': 1.2898346185684204} +03/05/2022 09:34:59 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 09:35:04 - INFO - codeparrot_training - Step 37532: {'lr': 0.00043219855834569006, 'samples': 19216896, 'steps': 37532, 'loss/train': 2.3175241947174072} +03/05/2022 09:35:07 - INFO - codeparrot_training - Step 37533: {'lr': 0.00043219492460288937, 'samples': 19217408, 'steps': 37533, 'loss/train': 1.6545515060424805} +03/05/2022 09:35:07 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 09:35:12 - INFO - codeparrot_training - Step 37534: {'lr': 0.00043219129077799447, 'samples': 19217920, 'steps': 37534, 'loss/train': 1.0107697248458862} +03/05/2022 09:35:16 - INFO - codeparrot_training - Step 37535: {'lr': 0.000432187656871007, 'samples': 19218432, 'steps': 37535, 'loss/train': 1.9901726245880127} +03/05/2022 09:35:16 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 09:35:21 - INFO - codeparrot_training - Step 37536: {'lr': 0.0004321840228819286, 'samples': 19218944, 'steps': 37536, 'loss/train': 1.5239471197128296} +03/05/2022 09:35:24 - INFO - codeparrot_training - Step 37537: {'lr': 0.0004321803888107608, 'samples': 19219456, 'steps': 37537, 'loss/train': 1.41885507106781} +03/05/2022 09:35:24 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/05/2022 09:35:30 - INFO - codeparrot_training - Step 37538: {'lr': 0.0004321767546575054, 'samples': 19219968, 'steps': 37538, 'loss/train': 1.091214895248413} +03/05/2022 09:35:33 - INFO - codeparrot_training - Step 37539: {'lr': 0.000432173120422164, 'samples': 19220480, 'steps': 37539, 'loss/train': 1.9895609617233276} +03/05/2022 09:35:33 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/05/2022 09:35:38 - INFO - codeparrot_training - Step 37540: {'lr': 0.00043216948610473816, 'samples': 19220992, 'steps': 37540, 'loss/train': 1.8979796171188354} +03/05/2022 09:35:41 - INFO - codeparrot_training - Step 37541: {'lr': 0.0004321658517052296, 'samples': 19221504, 'steps': 37541, 'loss/train': 2.4974050521850586} +03/05/2022 09:35:41 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/05/2022 09:35:46 - INFO - codeparrot_training - Step 37542: {'lr': 0.00043216221722363983, 'samples': 19222016, 'steps': 37542, 'loss/train': 1.7077895402908325} +03/05/2022 09:35:50 - INFO - codeparrot_training - Step 37543: {'lr': 0.00043215858265997065, 'samples': 19222528, 'steps': 37543, 'loss/train': 2.459596872329712} +03/05/2022 09:35:50 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 09:35:55 - INFO - codeparrot_training - Step 37544: {'lr': 0.0004321549480142236, 'samples': 19223040, 'steps': 37544, 'loss/train': 1.4157896041870117} +03/05/2022 09:35:58 - INFO - codeparrot_training - Step 37545: {'lr': 0.0004321513132864003, 'samples': 19223552, 'steps': 37545, 'loss/train': 1.6512104272842407} +03/05/2022 09:35:58 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/05/2022 09:36:03 - INFO - codeparrot_training - Step 37546: {'lr': 0.0004321476784765025, 'samples': 19224064, 'steps': 37546, 'loss/train': 1.4934146404266357} +03/05/2022 09:36:07 - INFO - codeparrot_training - Step 37547: {'lr': 0.00043214404358453174, 'samples': 19224576, 'steps': 37547, 'loss/train': 2.1848220825195312} +03/05/2022 09:36:07 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/05/2022 09:36:12 - INFO - codeparrot_training - Step 37548: {'lr': 0.0004321404086104897, 'samples': 19225088, 'steps': 37548, 'loss/train': 1.0849947929382324} +03/05/2022 09:36:15 - INFO - codeparrot_training - Step 37549: {'lr': 0.00043213677355437795, 'samples': 19225600, 'steps': 37549, 'loss/train': 1.5723587274551392} +03/05/2022 09:36:15 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/05/2022 09:36:21 - INFO - codeparrot_training - Step 37550: {'lr': 0.0004321331384161983, 'samples': 19226112, 'steps': 37550, 'loss/train': 1.5870014429092407} +03/05/2022 09:36:23 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 09:36:26 - INFO - codeparrot_training - Step 37551: {'lr': 0.00043212950319595215, 'samples': 19226624, 'steps': 37551, 'loss/train': 1.150944709777832} +03/05/2022 09:36:29 - INFO - codeparrot_training - Step 37552: {'lr': 0.0004321258678936413, 'samples': 19227136, 'steps': 37552, 'loss/train': 1.9063167572021484} +03/05/2022 09:36:32 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 09:36:34 - INFO - codeparrot_training - Step 37553: {'lr': 0.00043212223250926727, 'samples': 19227648, 'steps': 37553, 'loss/train': 1.9986827373504639} +03/05/2022 09:36:37 - INFO - codeparrot_training - Step 37554: {'lr': 0.00043211859704283184, 'samples': 19228160, 'steps': 37554, 'loss/train': 1.2354141473770142} +03/05/2022 09:36:40 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 09:36:43 - INFO - codeparrot_training - Step 37555: {'lr': 0.0004321149614943366, 'samples': 19228672, 'steps': 37555, 'loss/train': 2.132129192352295} +03/05/2022 09:36:46 - INFO - codeparrot_training - Step 37556: {'lr': 0.0004321113258637832, 'samples': 19229184, 'steps': 37556, 'loss/train': 1.7102023363113403} +03/05/2022 09:36:48 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/05/2022 09:36:51 - INFO - codeparrot_training - Step 37557: {'lr': 0.0004321076901511731, 'samples': 19229696, 'steps': 37557, 'loss/train': 1.96870756149292} +03/05/2022 09:36:54 - INFO - codeparrot_training - Step 37558: {'lr': 0.0004321040543565082, 'samples': 19230208, 'steps': 37558, 'loss/train': 1.890367031097412} +03/05/2022 09:36:57 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/05/2022 09:36:59 - INFO - codeparrot_training - Step 37559: {'lr': 0.00043210041847979003, 'samples': 19230720, 'steps': 37559, 'loss/train': 1.7443681955337524} +03/05/2022 09:37:03 - INFO - codeparrot_training - Step 37560: {'lr': 0.0004320967825210202, 'samples': 19231232, 'steps': 37560, 'loss/train': 1.8006112575531006} +03/05/2022 09:37:05 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 09:37:08 - INFO - codeparrot_training - Step 37561: {'lr': 0.00043209314648020035, 'samples': 19231744, 'steps': 37561, 'loss/train': 2.002596139907837} +03/05/2022 09:37:11 - INFO - codeparrot_training - Step 37562: {'lr': 0.0004320895103573321, 'samples': 19232256, 'steps': 37562, 'loss/train': 1.851914882659912} +03/05/2022 09:37:14 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 09:37:16 - INFO - codeparrot_training - Step 37563: {'lr': 0.00043208587415241725, 'samples': 19232768, 'steps': 37563, 'loss/train': 1.6936287879943848} +03/05/2022 09:37:20 - INFO - codeparrot_training - Step 37564: {'lr': 0.00043208223786545723, 'samples': 19233280, 'steps': 37564, 'loss/train': 1.9661991596221924} +03/05/2022 09:37:22 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 09:37:25 - INFO - codeparrot_training - Step 37565: {'lr': 0.0004320786014964538, 'samples': 19233792, 'steps': 37565, 'loss/train': 1.6039819717407227} +03/05/2022 09:37:28 - INFO - codeparrot_training - Step 37566: {'lr': 0.0004320749650454085, 'samples': 19234304, 'steps': 37566, 'loss/train': 1.5569870471954346} +03/05/2022 09:37:31 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 09:37:33 - INFO - codeparrot_training - Step 37567: {'lr': 0.0004320713285123231, 'samples': 19234816, 'steps': 37567, 'loss/train': 2.1060752868652344} +03/05/2022 09:37:37 - INFO - codeparrot_training - Step 37568: {'lr': 0.0004320676918971991, 'samples': 19235328, 'steps': 37568, 'loss/train': 2.1094367504119873} +03/05/2022 09:37:39 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 09:37:42 - INFO - codeparrot_training - Step 37569: {'lr': 0.00043206405520003824, 'samples': 19235840, 'steps': 37569, 'loss/train': 1.2407015562057495} +03/05/2022 09:37:45 - INFO - codeparrot_training - Step 37570: {'lr': 0.00043206041842084214, 'samples': 19236352, 'steps': 37570, 'loss/train': 1.5935955047607422} +03/05/2022 09:37:48 - INFO - codeparrot_training - Step 37571: {'lr': 0.00043205678155961244, 'samples': 19236864, 'steps': 37571, 'loss/train': 0.7627046704292297} +03/05/2022 09:37:48 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 09:37:54 - INFO - codeparrot_training - Step 37572: {'lr': 0.0004320531446163507, 'samples': 19237376, 'steps': 37572, 'loss/train': 1.795957088470459} +03/05/2022 09:37:57 - INFO - codeparrot_training - Step 37573: {'lr': 0.00043204950759105865, 'samples': 19237888, 'steps': 37573, 'loss/train': 1.1871663331985474} +03/05/2022 09:37:57 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/05/2022 09:38:02 - INFO - codeparrot_training - Step 37574: {'lr': 0.0004320458704837379, 'samples': 19238400, 'steps': 37574, 'loss/train': 1.407565951347351} +03/05/2022 09:38:05 - INFO - codeparrot_training - Step 37575: {'lr': 0.00043204223329439015, 'samples': 19238912, 'steps': 37575, 'loss/train': 1.3326168060302734} +03/05/2022 09:38:05 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/05/2022 09:38:10 - INFO - codeparrot_training - Step 37576: {'lr': 0.00043203859602301695, 'samples': 19239424, 'steps': 37576, 'loss/train': 1.0775507688522339} +03/05/2022 09:38:14 - INFO - codeparrot_training - Step 37577: {'lr': 0.00043203495866961996, 'samples': 19239936, 'steps': 37577, 'loss/train': 1.785317063331604} +03/05/2022 09:38:14 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 09:38:19 - INFO - codeparrot_training - Step 37578: {'lr': 0.00043203132123420074, 'samples': 19240448, 'steps': 37578, 'loss/train': 3.5797009468078613} +03/05/2022 09:38:22 - INFO - codeparrot_training - Step 37579: {'lr': 0.00043202768371676113, 'samples': 19240960, 'steps': 37579, 'loss/train': 2.3938326835632324} +03/05/2022 09:38:22 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 09:38:27 - INFO - codeparrot_training - Step 37580: {'lr': 0.0004320240461173026, 'samples': 19241472, 'steps': 37580, 'loss/train': 2.0764517784118652} +03/05/2022 09:38:31 - INFO - codeparrot_training - Step 37581: {'lr': 0.00043202040843582685, 'samples': 19241984, 'steps': 37581, 'loss/train': 1.7297992706298828} +03/05/2022 09:38:31 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 09:38:36 - INFO - codeparrot_training - Step 37582: {'lr': 0.00043201677067233554, 'samples': 19242496, 'steps': 37582, 'loss/train': 1.4367398023605347} +03/05/2022 09:38:39 - INFO - codeparrot_training - Step 37583: {'lr': 0.00043201313282683024, 'samples': 19243008, 'steps': 37583, 'loss/train': 1.9547196626663208} +03/05/2022 09:38:39 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/05/2022 09:38:44 - INFO - codeparrot_training - Step 37584: {'lr': 0.0004320094948993127, 'samples': 19243520, 'steps': 37584, 'loss/train': 1.7415258884429932} +03/05/2022 09:38:47 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 09:38:49 - INFO - codeparrot_training - Step 37585: {'lr': 0.00043200585688978445, 'samples': 19244032, 'steps': 37585, 'loss/train': 1.7008999586105347} +03/05/2022 09:38:53 - INFO - codeparrot_training - Step 37586: {'lr': 0.00043200221879824706, 'samples': 19244544, 'steps': 37586, 'loss/train': 2.7623813152313232} +03/05/2022 09:38:55 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 09:38:58 - INFO - codeparrot_training - Step 37587: {'lr': 0.0004319985806247024, 'samples': 19245056, 'steps': 37587, 'loss/train': 1.3866840600967407} +03/05/2022 09:39:01 - INFO - codeparrot_training - Step 37588: {'lr': 0.00043199494236915206, 'samples': 19245568, 'steps': 37588, 'loss/train': 1.6045198440551758} +03/05/2022 09:39:04 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 09:39:06 - INFO - codeparrot_training - Step 37589: {'lr': 0.0004319913040315975, 'samples': 19246080, 'steps': 37589, 'loss/train': 2.2629575729370117} +03/05/2022 09:39:10 - INFO - codeparrot_training - Step 37590: {'lr': 0.00043198766561204047, 'samples': 19246592, 'steps': 37590, 'loss/train': 1.629794716835022} +03/05/2022 09:39:12 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/05/2022 09:39:15 - INFO - codeparrot_training - Step 37591: {'lr': 0.0004319840271104826, 'samples': 19247104, 'steps': 37591, 'loss/train': 1.5631990432739258} +03/05/2022 09:39:18 - INFO - codeparrot_training - Step 37592: {'lr': 0.0004319803885269256, 'samples': 19247616, 'steps': 37592, 'loss/train': 1.793030858039856} +03/05/2022 09:39:21 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 09:39:23 - INFO - codeparrot_training - Step 37593: {'lr': 0.0004319767498613709, 'samples': 19248128, 'steps': 37593, 'loss/train': 0.9188294410705566} +03/05/2022 09:39:27 - INFO - codeparrot_training - Step 37594: {'lr': 0.00043197311111382045, 'samples': 19248640, 'steps': 37594, 'loss/train': 1.8722147941589355} +03/05/2022 09:39:29 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/05/2022 09:39:32 - INFO - codeparrot_training - Step 37595: {'lr': 0.00043196947228427564, 'samples': 19249152, 'steps': 37595, 'loss/train': 0.9693514704704285} +03/05/2022 09:39:35 - INFO - codeparrot_training - Step 37596: {'lr': 0.0004319658333727382, 'samples': 19249664, 'steps': 37596, 'loss/train': 1.1033308506011963} +03/05/2022 09:39:37 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 09:39:40 - INFO - codeparrot_training - Step 37597: {'lr': 0.0004319621943792098, 'samples': 19250176, 'steps': 37597, 'loss/train': 1.750913381576538} +03/05/2022 09:39:43 - INFO - codeparrot_training - Step 37598: {'lr': 0.000431958555303692, 'samples': 19250688, 'steps': 37598, 'loss/train': 1.601717472076416} +03/05/2022 09:39:45 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 09:39:49 - INFO - codeparrot_training - Step 37599: {'lr': 0.00043195491614618655, 'samples': 19251200, 'steps': 37599, 'loss/train': 1.671321153640747} +03/05/2022 09:39:52 - INFO - codeparrot_training - Step 37600: {'lr': 0.00043195127690669486, 'samples': 19251712, 'steps': 37600, 'loss/train': 2.0962648391723633} +03/05/2022 09:39:54 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 09:39:57 - INFO - codeparrot_training - Step 37601: {'lr': 0.00043194763758521896, 'samples': 19252224, 'steps': 37601, 'loss/train': 1.908949375152588} +03/05/2022 09:40:00 - INFO - codeparrot_training - Step 37602: {'lr': 0.00043194399818176013, 'samples': 19252736, 'steps': 37602, 'loss/train': 2.277860403060913} +03/05/2022 09:40:02 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 09:40:06 - INFO - codeparrot_training - Step 37603: {'lr': 0.00043194035869632017, 'samples': 19253248, 'steps': 37603, 'loss/train': 1.6188175678253174} +03/05/2022 09:40:09 - INFO - codeparrot_training - Step 37604: {'lr': 0.00043193671912890064, 'samples': 19253760, 'steps': 37604, 'loss/train': 0.15518000721931458} +03/05/2022 09:40:11 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 09:40:14 - INFO - codeparrot_training - Step 37605: {'lr': 0.0004319330794795033, 'samples': 19254272, 'steps': 37605, 'loss/train': 2.093111515045166} +03/05/2022 09:40:17 - INFO - codeparrot_training - Step 37606: {'lr': 0.0004319294397481297, 'samples': 19254784, 'steps': 37606, 'loss/train': 0.5295997262001038} +03/05/2022 09:40:19 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/05/2022 09:40:22 - INFO - codeparrot_training - Step 37607: {'lr': 0.0004319257999347815, 'samples': 19255296, 'steps': 37607, 'loss/train': 2.154928207397461} +03/05/2022 09:40:26 - INFO - codeparrot_training - Step 37608: {'lr': 0.0004319221600394603, 'samples': 19255808, 'steps': 37608, 'loss/train': 1.4473485946655273} +03/05/2022 09:40:27 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/05/2022 09:40:31 - INFO - codeparrot_training - Step 37609: {'lr': 0.0004319185200621678, 'samples': 19256320, 'steps': 37609, 'loss/train': 1.3819894790649414} +03/05/2022 09:40:34 - INFO - codeparrot_training - Step 37610: {'lr': 0.0004319148800029057, 'samples': 19256832, 'steps': 37610, 'loss/train': 1.1123124361038208} +03/05/2022 09:40:36 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/05/2022 09:40:39 - INFO - codeparrot_training - Step 37611: {'lr': 0.0004319112398616755, 'samples': 19257344, 'steps': 37611, 'loss/train': 2.336869716644287} +03/05/2022 09:40:43 - INFO - codeparrot_training - Step 37612: {'lr': 0.00043190759963847894, 'samples': 19257856, 'steps': 37612, 'loss/train': 1.7621959447860718} +03/05/2022 09:40:44 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/05/2022 09:40:48 - INFO - codeparrot_training - Step 37613: {'lr': 0.00043190395933331757, 'samples': 19258368, 'steps': 37613, 'loss/train': 2.209055185317993} +03/05/2022 09:40:51 - INFO - codeparrot_training - Step 37614: {'lr': 0.00043190031894619306, 'samples': 19258880, 'steps': 37614, 'loss/train': 1.5879660844802856} +03/05/2022 09:40:53 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/05/2022 09:40:56 - INFO - codeparrot_training - Step 37615: {'lr': 0.0004318966784771071, 'samples': 19259392, 'steps': 37615, 'loss/train': 1.498449444770813} +03/05/2022 09:40:59 - INFO - codeparrot_training - Step 37616: {'lr': 0.00043189303792606136, 'samples': 19259904, 'steps': 37616, 'loss/train': 2.973716974258423} +03/05/2022 09:41:02 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 09:41:05 - INFO - codeparrot_training - Step 37617: {'lr': 0.0004318893972930574, 'samples': 19260416, 'steps': 37617, 'loss/train': 1.7208435535430908} +03/05/2022 09:41:08 - INFO - codeparrot_training - Step 37618: {'lr': 0.00043188575657809685, 'samples': 19260928, 'steps': 37618, 'loss/train': 1.855859637260437} +03/05/2022 09:41:10 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 09:41:13 - INFO - codeparrot_training - Step 37619: {'lr': 0.00043188211578118143, 'samples': 19261440, 'steps': 37619, 'loss/train': 0.889819324016571} +03/05/2022 09:41:16 - INFO - codeparrot_training - Step 37620: {'lr': 0.0004318784749023127, 'samples': 19261952, 'steps': 37620, 'loss/train': 1.6071275472640991} +03/05/2022 09:41:19 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 09:41:22 - INFO - codeparrot_training - Step 37621: {'lr': 0.0004318748339414923, 'samples': 19262464, 'steps': 37621, 'loss/train': 1.5687029361724854} +03/05/2022 09:41:25 - INFO - codeparrot_training - Step 37622: {'lr': 0.000431871192898722, 'samples': 19262976, 'steps': 37622, 'loss/train': 1.8093433380126953} +03/05/2022 09:41:28 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 09:41:30 - INFO - codeparrot_training - Step 37623: {'lr': 0.0004318675517740033, 'samples': 19263488, 'steps': 37623, 'loss/train': 0.7689558267593384} +03/05/2022 09:41:33 - INFO - codeparrot_training - Step 37624: {'lr': 0.0004318639105673379, 'samples': 19264000, 'steps': 37624, 'loss/train': 2.152623414993286} +03/05/2022 09:41:36 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/05/2022 09:41:39 - INFO - codeparrot_training - Step 37625: {'lr': 0.00043186026927872736, 'samples': 19264512, 'steps': 37625, 'loss/train': 0.4563499391078949} +03/05/2022 09:41:42 - INFO - codeparrot_training - Step 37626: {'lr': 0.0004318566279081735, 'samples': 19265024, 'steps': 37626, 'loss/train': 2.188025951385498} +03/05/2022 09:41:44 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 09:41:47 - INFO - codeparrot_training - Step 37627: {'lr': 0.0004318529864556777, 'samples': 19265536, 'steps': 37627, 'loss/train': 0.14094699919223785} +03/05/2022 09:41:50 - INFO - codeparrot_training - Step 37628: {'lr': 0.0004318493449212419, 'samples': 19266048, 'steps': 37628, 'loss/train': 2.102076768875122} +03/05/2022 09:41:52 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/05/2022 09:41:56 - INFO - codeparrot_training - Step 37629: {'lr': 0.00043184570330486756, 'samples': 19266560, 'steps': 37629, 'loss/train': 1.6116771697998047} +03/05/2022 09:41:59 - INFO - codeparrot_training - Step 37630: {'lr': 0.0004318420616065563, 'samples': 19267072, 'steps': 37630, 'loss/train': 2.1827826499938965} +03/05/2022 09:42:02 - INFO - codeparrot_training - Step 37631: {'lr': 0.0004318384198263099, 'samples': 19267584, 'steps': 37631, 'loss/train': 1.6048632860183716} +03/05/2022 09:42:03 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 09:42:07 - INFO - codeparrot_training - Step 37632: {'lr': 0.0004318347779641298, 'samples': 19268096, 'steps': 37632, 'loss/train': 2.236224412918091} +03/05/2022 09:42:11 - INFO - codeparrot_training - Step 37633: {'lr': 0.00043183113602001777, 'samples': 19268608, 'steps': 37633, 'loss/train': 1.839824914932251} +03/05/2022 09:42:12 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/05/2022 09:42:16 - INFO - codeparrot_training - Step 37634: {'lr': 0.0004318274939939755, 'samples': 19269120, 'steps': 37634, 'loss/train': 1.442491888999939} +03/05/2022 09:42:19 - INFO - codeparrot_training - Step 37635: {'lr': 0.00043182385188600457, 'samples': 19269632, 'steps': 37635, 'loss/train': 2.23506498336792} +03/05/2022 09:42:20 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/05/2022 09:42:24 - INFO - codeparrot_training - Step 37636: {'lr': 0.0004318202096961066, 'samples': 19270144, 'steps': 37636, 'loss/train': 1.7184643745422363} +03/05/2022 09:42:28 - INFO - codeparrot_training - Step 37637: {'lr': 0.0004318165674242832, 'samples': 19270656, 'steps': 37637, 'loss/train': 1.5109783411026} +03/05/2022 09:42:29 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/05/2022 09:42:33 - INFO - codeparrot_training - Step 37638: {'lr': 0.0004318129250705361, 'samples': 19271168, 'steps': 37638, 'loss/train': 1.7208701372146606} +03/05/2022 09:42:36 - INFO - codeparrot_training - Step 37639: {'lr': 0.0004318092826348669, 'samples': 19271680, 'steps': 37639, 'loss/train': 2.121558427810669} +03/05/2022 09:42:37 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 09:42:41 - INFO - codeparrot_training - Step 37640: {'lr': 0.0004318056401172772, 'samples': 19272192, 'steps': 37640, 'loss/train': 2.3769032955169678} +03/05/2022 09:42:45 - INFO - codeparrot_training - Step 37641: {'lr': 0.0004318019975177688, 'samples': 19272704, 'steps': 37641, 'loss/train': 2.73527193069458} +03/05/2022 09:42:45 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/05/2022 09:42:50 - INFO - codeparrot_training - Step 37642: {'lr': 0.0004317983548363431, 'samples': 19273216, 'steps': 37642, 'loss/train': 2.0738365650177} +03/05/2022 09:42:53 - INFO - codeparrot_training - Step 37643: {'lr': 0.0004317947120730019, 'samples': 19273728, 'steps': 37643, 'loss/train': 1.6203736066818237} +03/05/2022 09:42:54 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/05/2022 09:42:58 - INFO - codeparrot_training - Step 37644: {'lr': 0.0004317910692277469, 'samples': 19274240, 'steps': 37644, 'loss/train': 1.735791802406311} +03/05/2022 09:43:01 - INFO - codeparrot_training - Step 37645: {'lr': 0.0004317874263005795, 'samples': 19274752, 'steps': 37645, 'loss/train': 1.9624476432800293} +03/05/2022 09:43:02 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/05/2022 09:43:07 - INFO - codeparrot_training - Step 37646: {'lr': 0.0004317837832915016, 'samples': 19275264, 'steps': 37646, 'loss/train': 2.015181541442871} +03/05/2022 09:43:10 - INFO - codeparrot_training - Step 37647: {'lr': 0.0004317801402005147, 'samples': 19275776, 'steps': 37647, 'loss/train': 1.9798939228057861} +03/05/2022 09:43:10 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 09:43:15 - INFO - codeparrot_training - Step 37648: {'lr': 0.00043177649702762043, 'samples': 19276288, 'steps': 37648, 'loss/train': 1.3488126993179321} +03/05/2022 09:43:18 - INFO - codeparrot_training - Step 37649: {'lr': 0.0004317728537728206, 'samples': 19276800, 'steps': 37649, 'loss/train': 1.7996190786361694} +03/05/2022 09:43:18 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 09:43:23 - INFO - codeparrot_training - Step 37650: {'lr': 0.0004317692104361166, 'samples': 19277312, 'steps': 37650, 'loss/train': 2.3607280254364014} +03/05/2022 09:43:27 - INFO - codeparrot_training - Step 37651: {'lr': 0.0004317655670175102, 'samples': 19277824, 'steps': 37651, 'loss/train': 1.8736376762390137} +03/05/2022 09:43:27 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/05/2022 09:43:32 - INFO - codeparrot_training - Step 37652: {'lr': 0.0004317619235170032, 'samples': 19278336, 'steps': 37652, 'loss/train': 1.449759602546692} +03/05/2022 09:43:35 - INFO - codeparrot_training - Step 37653: {'lr': 0.00043175827993459696, 'samples': 19278848, 'steps': 37653, 'loss/train': 2.090639352798462} +03/05/2022 09:43:35 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/05/2022 09:43:40 - INFO - codeparrot_training - Step 37654: {'lr': 0.0004317546362702932, 'samples': 19279360, 'steps': 37654, 'loss/train': 0.8621086478233337} +03/05/2022 09:43:43 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/05/2022 09:43:46 - INFO - codeparrot_training - Step 37655: {'lr': 0.0004317509925240937, 'samples': 19279872, 'steps': 37655, 'loss/train': 1.5566388368606567} +03/05/2022 09:43:49 - INFO - codeparrot_training - Step 37656: {'lr': 0.00043174734869599993, 'samples': 19280384, 'steps': 37656, 'loss/train': 2.6040854454040527} +03/05/2022 09:43:52 - INFO - codeparrot_training - Step 37657: {'lr': 0.0004317437047860137, 'samples': 19280896, 'steps': 37657, 'loss/train': 1.002683401107788} +03/05/2022 09:43:52 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/05/2022 09:43:57 - INFO - codeparrot_training - Step 37658: {'lr': 0.0004317400607941364, 'samples': 19281408, 'steps': 37658, 'loss/train': 1.2695149183273315} +03/05/2022 09:44:00 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/05/2022 09:44:03 - INFO - codeparrot_training - Step 37659: {'lr': 0.00043173641672037, 'samples': 19281920, 'steps': 37659, 'loss/train': 2.4506208896636963} +03/05/2022 09:44:06 - INFO - codeparrot_training - Step 37660: {'lr': 0.00043173277256471586, 'samples': 19282432, 'steps': 37660, 'loss/train': 1.2594867944717407} +03/05/2022 09:44:09 - INFO - codeparrot_training - Step 37661: {'lr': 0.0004317291283271758, 'samples': 19282944, 'steps': 37661, 'loss/train': 2.516892194747925} +03/05/2022 09:44:09 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/05/2022 09:44:15 - INFO - codeparrot_training - Step 37662: {'lr': 0.0004317254840077514, 'samples': 19283456, 'steps': 37662, 'loss/train': 2.185739278793335} +03/05/2022 09:44:17 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/05/2022 09:44:20 - INFO - codeparrot_training - Step 37663: {'lr': 0.0004317218396064443, 'samples': 19283968, 'steps': 37663, 'loss/train': 2.31020188331604} +03/05/2022 09:44:23 - INFO - codeparrot_training - Step 37664: {'lr': 0.00043171819512325614, 'samples': 19284480, 'steps': 37664, 'loss/train': 1.3380119800567627} +03/05/2022 09:44:26 - INFO - codeparrot_training - Step 37665: {'lr': 0.00043171455055818854, 'samples': 19284992, 'steps': 37665, 'loss/train': 0.15655094385147095} +03/05/2022 09:44:26 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/05/2022 09:44:32 - INFO - codeparrot_training - Step 37666: {'lr': 0.0004317109059112432, 'samples': 19285504, 'steps': 37666, 'loss/train': 1.5209444761276245} +03/05/2022 09:44:35 - INFO - codeparrot_training - Step 37667: {'lr': 0.00043170726118242164, 'samples': 19286016, 'steps': 37667, 'loss/train': 1.9107359647750854} +03/05/2022 09:44:35 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 09:44:40 - INFO - codeparrot_training - Step 37668: {'lr': 0.0004317036163717257, 'samples': 19286528, 'steps': 37668, 'loss/train': 2.0399527549743652} +03/05/2022 09:44:43 - INFO - codeparrot_training - Step 37669: {'lr': 0.0004316999714791569, 'samples': 19287040, 'steps': 37669, 'loss/train': 1.8827881813049316} +03/05/2022 09:44:43 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 09:44:48 - INFO - codeparrot_training - Step 37670: {'lr': 0.0004316963265047169, 'samples': 19287552, 'steps': 37670, 'loss/train': 1.4759833812713623} +03/05/2022 09:44:51 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/05/2022 09:44:54 - INFO - codeparrot_training - Step 37671: {'lr': 0.00043169268144840726, 'samples': 19288064, 'steps': 37671, 'loss/train': 1.2252402305603027} +03/05/2022 09:44:57 - INFO - codeparrot_training - Step 37672: {'lr': 0.0004316890363102298, 'samples': 19288576, 'steps': 37672, 'loss/train': 2.0363316535949707} +03/05/2022 09:45:00 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/05/2022 09:45:02 - INFO - codeparrot_training - Step 37673: {'lr': 0.000431685391090186, 'samples': 19289088, 'steps': 37673, 'loss/train': 2.3227334022521973} +03/05/2022 09:45:05 - INFO - codeparrot_training - Step 37674: {'lr': 0.00043168174578827755, 'samples': 19289600, 'steps': 37674, 'loss/train': 1.842089056968689} +03/05/2022 09:45:08 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/05/2022 09:45:11 - INFO - codeparrot_training - Step 37675: {'lr': 0.00043167810040450617, 'samples': 19290112, 'steps': 37675, 'loss/train': 2.183668375015259} +03/05/2022 09:45:14 - INFO - codeparrot_training - Step 37676: {'lr': 0.00043167445493887347, 'samples': 19290624, 'steps': 37676, 'loss/train': 0.7465007901191711} +03/05/2022 09:45:17 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/05/2022 09:45:19 - INFO - codeparrot_training - Step 37677: {'lr': 0.000431670809391381, 'samples': 19291136, 'steps': 37677, 'loss/train': 0.5453247427940369} +03/05/2022 09:45:22 - INFO - codeparrot_training - Step 37678: {'lr': 0.00043166716376203047, 'samples': 19291648, 'steps': 37678, 'loss/train': 2.140970468521118} +03/05/2022 09:45:25 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 09:45:28 - INFO - codeparrot_training - Step 37679: {'lr': 0.0004316635180508235, 'samples': 19292160, 'steps': 37679, 'loss/train': 1.0217266082763672} +03/05/2022 09:45:31 - INFO - codeparrot_training - Step 37680: {'lr': 0.0004316598722577618, 'samples': 19292672, 'steps': 37680, 'loss/train': 2.5661678314208984} +03/05/2022 09:45:34 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 09:45:36 - INFO - codeparrot_training - Step 37681: {'lr': 0.000431656226382847, 'samples': 19293184, 'steps': 37681, 'loss/train': 1.9702166318893433} +03/05/2022 09:45:39 - INFO - codeparrot_training - Step 37682: {'lr': 0.00043165258042608055, 'samples': 19293696, 'steps': 37682, 'loss/train': 1.8913568258285522} +03/05/2022 09:45:42 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 09:45:45 - INFO - codeparrot_training - Step 37683: {'lr': 0.0004316489343874644, 'samples': 19294208, 'steps': 37683, 'loss/train': 1.447848916053772} +03/05/2022 09:45:48 - INFO - codeparrot_training - Step 37684: {'lr': 0.000431645288267, 'samples': 19294720, 'steps': 37684, 'loss/train': 1.0389361381530762} +03/05/2022 09:45:51 - INFO - codeparrot_training - Step 37685: {'lr': 0.00043164164206468904, 'samples': 19295232, 'steps': 37685, 'loss/train': 1.556795597076416} +03/05/2022 09:45:51 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/05/2022 09:45:56 - INFO - codeparrot_training - Step 37686: {'lr': 0.00043163799578053313, 'samples': 19295744, 'steps': 37686, 'loss/train': 1.0446492433547974} +03/05/2022 09:45:59 - INFO - codeparrot_training - Step 37687: {'lr': 0.00043163434941453395, 'samples': 19296256, 'steps': 37687, 'loss/train': 1.7491613626480103} +03/05/2022 09:46:00 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/05/2022 09:46:05 - INFO - codeparrot_training - Step 37688: {'lr': 0.00043163070296669317, 'samples': 19296768, 'steps': 37688, 'loss/train': 2.305279016494751} +03/05/2022 09:46:08 - INFO - codeparrot_training - Step 37689: {'lr': 0.00043162705643701236, 'samples': 19297280, 'steps': 37689, 'loss/train': 0.9249650835990906} +03/05/2022 09:46:08 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/05/2022 09:46:13 - INFO - codeparrot_training - Step 37690: {'lr': 0.00043162340982549327, 'samples': 19297792, 'steps': 37690, 'loss/train': 1.4148107767105103} +03/05/2022 09:46:16 - INFO - codeparrot_training - Step 37691: {'lr': 0.00043161976313213735, 'samples': 19298304, 'steps': 37691, 'loss/train': 1.9970656633377075} +03/05/2022 09:46:16 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 09:46:21 - INFO - codeparrot_training - Step 37692: {'lr': 0.0004316161163569465, 'samples': 19298816, 'steps': 37692, 'loss/train': 1.244062066078186} +03/05/2022 09:46:25 - INFO - codeparrot_training - Step 37693: {'lr': 0.0004316124694999222, 'samples': 19299328, 'steps': 37693, 'loss/train': 2.284325361251831} +03/05/2022 09:46:25 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 09:46:30 - INFO - codeparrot_training - Step 37694: {'lr': 0.000431608822561066, 'samples': 19299840, 'steps': 37694, 'loss/train': 2.0238091945648193} +03/05/2022 09:46:33 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/05/2022 09:46:35 - INFO - codeparrot_training - Step 37695: {'lr': 0.0004316051755403798, 'samples': 19300352, 'steps': 37695, 'loss/train': 0.8627633452415466} +03/05/2022 09:46:38 - INFO - codeparrot_training - Step 37696: {'lr': 0.000431601528437865, 'samples': 19300864, 'steps': 37696, 'loss/train': 1.9638502597808838} +03/05/2022 09:46:41 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/05/2022 09:46:44 - INFO - codeparrot_training - Step 37697: {'lr': 0.00043159788125352353, 'samples': 19301376, 'steps': 37697, 'loss/train': 1.6519570350646973} +03/05/2022 09:46:47 - INFO - codeparrot_training - Step 37698: {'lr': 0.0004315942339873567, 'samples': 19301888, 'steps': 37698, 'loss/train': 1.934112310409546} +03/05/2022 09:46:49 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 09:46:52 - INFO - codeparrot_training - Step 37699: {'lr': 0.00043159058663936635, 'samples': 19302400, 'steps': 37699, 'loss/train': 2.563800096511841} +03/05/2022 09:46:55 - INFO - codeparrot_training - Step 37700: {'lr': 0.0004315869392095542, 'samples': 19302912, 'steps': 37700, 'loss/train': 0.9338539838790894} +03/05/2022 09:46:58 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 09:47:01 - INFO - codeparrot_training - Step 37701: {'lr': 0.0004315832916979216, 'samples': 19303424, 'steps': 37701, 'loss/train': 2.3548097610473633} +03/05/2022 09:47:04 - INFO - codeparrot_training - Step 37702: {'lr': 0.00043157964410447047, 'samples': 19303936, 'steps': 37702, 'loss/train': 2.1663315296173096} +03/05/2022 09:47:07 - INFO - codeparrot_training - Step 37703: {'lr': 0.0004315759964292023, 'samples': 19304448, 'steps': 37703, 'loss/train': 1.0427130460739136} +03/05/2022 09:47:08 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 09:47:12 - INFO - codeparrot_training - Step 37704: {'lr': 0.0004315723486721188, 'samples': 19304960, 'steps': 37704, 'loss/train': 2.257753372192383} +03/05/2022 09:47:15 - INFO - codeparrot_training - Step 37705: {'lr': 0.00043156870083322166, 'samples': 19305472, 'steps': 37705, 'loss/train': 2.2345669269561768} +03/05/2022 09:47:16 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 09:47:21 - INFO - codeparrot_training - Step 37706: {'lr': 0.00043156505291251234, 'samples': 19305984, 'steps': 37706, 'loss/train': 1.7618998289108276} +03/05/2022 09:47:24 - INFO - codeparrot_training - Step 37707: {'lr': 0.00043156140490999275, 'samples': 19306496, 'steps': 37707, 'loss/train': 1.9862146377563477} +03/05/2022 09:47:24 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/05/2022 09:47:29 - INFO - codeparrot_training - Step 37708: {'lr': 0.0004315577568256643, 'samples': 19307008, 'steps': 37708, 'loss/train': 1.8448580503463745} +03/05/2022 09:47:32 - INFO - codeparrot_training - Step 37709: {'lr': 0.0004315541086595288, 'samples': 19307520, 'steps': 37709, 'loss/train': 1.2533881664276123} +03/05/2022 09:47:33 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 09:47:38 - INFO - codeparrot_training - Step 37710: {'lr': 0.00043155046041158776, 'samples': 19308032, 'steps': 37710, 'loss/train': 1.3395535945892334} +03/05/2022 09:47:41 - INFO - codeparrot_training - Step 37711: {'lr': 0.0004315468120818429, 'samples': 19308544, 'steps': 37711, 'loss/train': 2.15299129486084} +03/05/2022 09:47:41 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 09:47:46 - INFO - codeparrot_training - Step 37712: {'lr': 0.0004315431636702959, 'samples': 19309056, 'steps': 37712, 'loss/train': 1.5401694774627686} +03/05/2022 09:47:49 - INFO - codeparrot_training - Step 37713: {'lr': 0.00043153951517694824, 'samples': 19309568, 'steps': 37713, 'loss/train': 1.1506346464157104} +03/05/2022 09:47:50 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/05/2022 09:47:55 - INFO - codeparrot_training - Step 37714: {'lr': 0.0004315358666018018, 'samples': 19310080, 'steps': 37714, 'loss/train': 1.6552873849868774} +03/05/2022 09:47:58 - INFO - codeparrot_training - Step 37715: {'lr': 0.00043153221794485795, 'samples': 19310592, 'steps': 37715, 'loss/train': 1.3020331859588623} +03/05/2022 09:47:59 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 09:48:03 - INFO - codeparrot_training - Step 37716: {'lr': 0.0004315285692061186, 'samples': 19311104, 'steps': 37716, 'loss/train': 1.6908332109451294} +03/05/2022 09:48:06 - INFO - codeparrot_training - Step 37717: {'lr': 0.00043152492038558526, 'samples': 19311616, 'steps': 37717, 'loss/train': 1.7883599996566772} +03/05/2022 09:48:07 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 09:48:12 - INFO - codeparrot_training - Step 37718: {'lr': 0.00043152127148325957, 'samples': 19312128, 'steps': 37718, 'loss/train': 1.509145736694336} +03/05/2022 09:48:15 - INFO - codeparrot_training - Step 37719: {'lr': 0.00043151762249914324, 'samples': 19312640, 'steps': 37719, 'loss/train': 1.5560741424560547} +03/05/2022 09:48:15 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/05/2022 09:48:20 - INFO - codeparrot_training - Step 37720: {'lr': 0.00043151397343323784, 'samples': 19313152, 'steps': 37720, 'loss/train': 2.2463555335998535} +03/05/2022 09:48:23 - INFO - codeparrot_training - Step 37721: {'lr': 0.00043151032428554505, 'samples': 19313664, 'steps': 37721, 'loss/train': 1.599293828010559} +03/05/2022 09:48:23 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 09:48:29 - INFO - codeparrot_training - Step 37722: {'lr': 0.0004315066750560665, 'samples': 19314176, 'steps': 37722, 'loss/train': 0.6030313372612} +03/05/2022 09:48:32 - INFO - codeparrot_training - Step 37723: {'lr': 0.0004315030257448038, 'samples': 19314688, 'steps': 37723, 'loss/train': 0.8725462555885315} +03/05/2022 09:48:32 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 09:48:37 - INFO - codeparrot_training - Step 37724: {'lr': 0.00043149937635175874, 'samples': 19315200, 'steps': 37724, 'loss/train': 2.121915102005005} +03/05/2022 09:48:40 - INFO - codeparrot_training - Step 37725: {'lr': 0.0004314957268769328, 'samples': 19315712, 'steps': 37725, 'loss/train': 2.5113537311553955} +03/05/2022 09:48:41 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/05/2022 09:48:45 - INFO - codeparrot_training - Step 37726: {'lr': 0.00043149207732032767, 'samples': 19316224, 'steps': 37726, 'loss/train': 1.9718410968780518} +03/05/2022 09:48:49 - INFO - codeparrot_training - Step 37727: {'lr': 0.00043148842768194503, 'samples': 19316736, 'steps': 37727, 'loss/train': 1.9268851280212402} +03/05/2022 09:48:50 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 09:48:54 - INFO - codeparrot_training - Step 37728: {'lr': 0.0004314847779617865, 'samples': 19317248, 'steps': 37728, 'loss/train': 1.3978252410888672} +03/05/2022 09:48:57 - INFO - codeparrot_training - Step 37729: {'lr': 0.00043148112815985377, 'samples': 19317760, 'steps': 37729, 'loss/train': 1.919399380683899} +03/05/2022 09:48:58 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 09:49:02 - INFO - codeparrot_training - Step 37730: {'lr': 0.0004314774782761484, 'samples': 19318272, 'steps': 37730, 'loss/train': 1.531058430671692} +03/05/2022 09:49:05 - INFO - codeparrot_training - Step 37731: {'lr': 0.00043147382831067204, 'samples': 19318784, 'steps': 37731, 'loss/train': 1.4913954734802246} +03/05/2022 09:49:07 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 09:49:11 - INFO - codeparrot_training - Step 37732: {'lr': 0.0004314701782634264, 'samples': 19319296, 'steps': 37732, 'loss/train': 3.1708483695983887} +03/05/2022 09:49:14 - INFO - codeparrot_training - Step 37733: {'lr': 0.0004314665281344132, 'samples': 19319808, 'steps': 37733, 'loss/train': 1.2305701971054077} +03/05/2022 09:49:15 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/05/2022 09:49:19 - INFO - codeparrot_training - Step 37734: {'lr': 0.0004314628779236339, 'samples': 19320320, 'steps': 37734, 'loss/train': 1.623321771621704} +03/05/2022 09:49:22 - INFO - codeparrot_training - Step 37735: {'lr': 0.00043145922763109017, 'samples': 19320832, 'steps': 37735, 'loss/train': 2.278578758239746} +03/05/2022 09:49:24 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 09:49:28 - INFO - codeparrot_training - Step 37736: {'lr': 0.0004314555772567838, 'samples': 19321344, 'steps': 37736, 'loss/train': 1.3956269025802612} +03/05/2022 09:49:31 - INFO - codeparrot_training - Step 37737: {'lr': 0.0004314519268007163, 'samples': 19321856, 'steps': 37737, 'loss/train': 1.6231944561004639} +03/05/2022 09:49:32 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/05/2022 09:49:36 - INFO - codeparrot_training - Step 37738: {'lr': 0.00043144827626288943, 'samples': 19322368, 'steps': 37738, 'loss/train': 0.38999781012535095} +03/05/2022 09:49:39 - INFO - codeparrot_training - Step 37739: {'lr': 0.00043144462564330464, 'samples': 19322880, 'steps': 37739, 'loss/train': 1.5829609632492065} +03/05/2022 09:49:41 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 09:49:44 - INFO - codeparrot_training - Step 37740: {'lr': 0.0004314409749419638, 'samples': 19323392, 'steps': 37740, 'loss/train': 1.763595461845398} +03/05/2022 09:49:48 - INFO - codeparrot_training - Step 37741: {'lr': 0.00043143732415886843, 'samples': 19323904, 'steps': 37741, 'loss/train': 1.9644676446914673} +03/05/2022 09:49:49 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 09:49:53 - INFO - codeparrot_training - Step 37742: {'lr': 0.0004314336732940202, 'samples': 19324416, 'steps': 37742, 'loss/train': 2.000657081604004} +03/05/2022 09:49:56 - INFO - codeparrot_training - Step 37743: {'lr': 0.0004314300223474208, 'samples': 19324928, 'steps': 37743, 'loss/train': 1.6981432437896729} +03/05/2022 09:49:58 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 09:50:01 - INFO - codeparrot_training - Step 37744: {'lr': 0.0004314263713190718, 'samples': 19325440, 'steps': 37744, 'loss/train': 1.6584651470184326} +03/05/2022 09:50:05 - INFO - codeparrot_training - Step 37745: {'lr': 0.00043142272020897486, 'samples': 19325952, 'steps': 37745, 'loss/train': 1.766785740852356} +03/05/2022 09:50:07 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 09:50:10 - INFO - codeparrot_training - Step 37746: {'lr': 0.0004314190690171317, 'samples': 19326464, 'steps': 37746, 'loss/train': 0.4846493899822235} +03/05/2022 09:50:13 - INFO - codeparrot_training - Step 37747: {'lr': 0.0004314154177435438, 'samples': 19326976, 'steps': 37747, 'loss/train': 2.2789077758789062} +03/05/2022 09:50:15 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 09:50:18 - INFO - codeparrot_training - Step 37748: {'lr': 0.000431411766388213, 'samples': 19327488, 'steps': 37748, 'loss/train': 1.3124229907989502} +03/05/2022 09:50:22 - INFO - codeparrot_training - Step 37749: {'lr': 0.0004314081149511409, 'samples': 19328000, 'steps': 37749, 'loss/train': 1.511587381362915} +03/05/2022 09:50:24 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/05/2022 09:50:27 - INFO - codeparrot_training - Step 37750: {'lr': 0.00043140446343232895, 'samples': 19328512, 'steps': 37750, 'loss/train': 2.342555284500122} +03/05/2022 09:50:30 - INFO - codeparrot_training - Step 37751: {'lr': 0.000431400811831779, 'samples': 19329024, 'steps': 37751, 'loss/train': 1.2448031902313232} +03/05/2022 09:50:32 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 09:50:36 - INFO - codeparrot_training - Step 37752: {'lr': 0.0004313971601494927, 'samples': 19329536, 'steps': 37752, 'loss/train': 1.5074920654296875} +03/05/2022 09:50:39 - INFO - codeparrot_training - Step 37753: {'lr': 0.0004313935083854716, 'samples': 19330048, 'steps': 37753, 'loss/train': 2.204068183898926} +03/05/2022 09:50:41 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 09:50:44 - INFO - codeparrot_training - Step 37754: {'lr': 0.0004313898565397174, 'samples': 19330560, 'steps': 37754, 'loss/train': 0.6262397170066833} +03/05/2022 09:50:47 - INFO - codeparrot_training - Step 37755: {'lr': 0.00043138620461223175, 'samples': 19331072, 'steps': 37755, 'loss/train': 2.0114166736602783} +03/05/2022 09:50:50 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/05/2022 09:50:52 - INFO - codeparrot_training - Step 37756: {'lr': 0.00043138255260301625, 'samples': 19331584, 'steps': 37756, 'loss/train': 1.8662606477737427} +03/05/2022 09:50:56 - INFO - codeparrot_training - Step 37757: {'lr': 0.0004313789005120725, 'samples': 19332096, 'steps': 37757, 'loss/train': 2.6365272998809814} +03/05/2022 09:50:58 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/05/2022 09:51:01 - INFO - codeparrot_training - Step 37758: {'lr': 0.00043137524833940233, 'samples': 19332608, 'steps': 37758, 'loss/train': 1.6917839050292969} +03/05/2022 09:51:04 - INFO - codeparrot_training - Step 37759: {'lr': 0.0004313715960850072, 'samples': 19333120, 'steps': 37759, 'loss/train': 2.211031436920166} +03/05/2022 09:51:06 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 09:51:09 - INFO - codeparrot_training - Step 37760: {'lr': 0.00043136794374888887, 'samples': 19333632, 'steps': 37760, 'loss/train': 2.454784631729126} +03/05/2022 09:51:12 - INFO - codeparrot_training - Step 37761: {'lr': 0.0004313642913310489, 'samples': 19334144, 'steps': 37761, 'loss/train': 1.8556796312332153} +03/05/2022 09:51:14 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 09:51:18 - INFO - codeparrot_training - Step 37762: {'lr': 0.00043136063883148905, 'samples': 19334656, 'steps': 37762, 'loss/train': 1.6991041898727417} +03/05/2022 09:51:21 - INFO - codeparrot_training - Step 37763: {'lr': 0.00043135698625021093, 'samples': 19335168, 'steps': 37763, 'loss/train': 2.8453540802001953} +03/05/2022 09:51:23 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 09:51:26 - INFO - codeparrot_training - Step 37764: {'lr': 0.000431353333587216, 'samples': 19335680, 'steps': 37764, 'loss/train': 0.9973475337028503} +03/05/2022 09:51:29 - INFO - codeparrot_training - Step 37765: {'lr': 0.00043134968084250616, 'samples': 19336192, 'steps': 37765, 'loss/train': 1.4388701915740967} +03/05/2022 09:51:31 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 09:51:34 - INFO - codeparrot_training - Step 37766: {'lr': 0.00043134602801608293, 'samples': 19336704, 'steps': 37766, 'loss/train': 1.4616645574569702} +03/05/2022 09:51:38 - INFO - codeparrot_training - Step 37767: {'lr': 0.00043134237510794794, 'samples': 19337216, 'steps': 37767, 'loss/train': 1.5076266527175903} +03/05/2022 09:51:39 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/05/2022 09:51:43 - INFO - codeparrot_training - Step 37768: {'lr': 0.0004313387221181029, 'samples': 19337728, 'steps': 37768, 'loss/train': 0.6607352495193481} +03/05/2022 09:51:46 - INFO - codeparrot_training - Step 37769: {'lr': 0.0004313350690465495, 'samples': 19338240, 'steps': 37769, 'loss/train': 1.7684088945388794} +03/05/2022 09:51:48 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 09:51:51 - INFO - codeparrot_training - Step 37770: {'lr': 0.00043133141589328923, 'samples': 19338752, 'steps': 37770, 'loss/train': 2.2141733169555664} +03/05/2022 09:51:54 - INFO - codeparrot_training - Step 37771: {'lr': 0.0004313277626583239, 'samples': 19339264, 'steps': 37771, 'loss/train': 0.8748182654380798} +03/05/2022 09:51:56 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 09:52:00 - INFO - codeparrot_training - Step 37772: {'lr': 0.000431324109341655, 'samples': 19339776, 'steps': 37772, 'loss/train': 1.0755783319473267} +03/05/2022 09:52:03 - INFO - codeparrot_training - Step 37773: {'lr': 0.0004313204559432842, 'samples': 19340288, 'steps': 37773, 'loss/train': 2.25142240524292} +03/05/2022 09:52:04 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 09:52:08 - INFO - codeparrot_training - Step 37774: {'lr': 0.0004313168024632133, 'samples': 19340800, 'steps': 37774, 'loss/train': 1.0825450420379639} +03/05/2022 09:52:11 - INFO - codeparrot_training - Step 37775: {'lr': 0.00043131314890144386, 'samples': 19341312, 'steps': 37775, 'loss/train': 1.0785362720489502} +03/05/2022 09:52:13 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/05/2022 09:52:17 - INFO - codeparrot_training - Step 37776: {'lr': 0.0004313094952579775, 'samples': 19341824, 'steps': 37776, 'loss/train': 1.680455207824707} +03/05/2022 09:52:20 - INFO - codeparrot_training - Step 37777: {'lr': 0.0004313058415328158, 'samples': 19342336, 'steps': 37777, 'loss/train': 3.060363531112671} +03/05/2022 09:52:21 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/05/2022 09:52:25 - INFO - codeparrot_training - Step 37778: {'lr': 0.00043130218772596053, 'samples': 19342848, 'steps': 37778, 'loss/train': 1.8972012996673584} +03/05/2022 09:52:28 - INFO - codeparrot_training - Step 37779: {'lr': 0.00043129853383741334, 'samples': 19343360, 'steps': 37779, 'loss/train': 2.1065475940704346} +03/05/2022 09:52:29 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/05/2022 09:52:33 - INFO - codeparrot_training - Step 37780: {'lr': 0.00043129487986717574, 'samples': 19343872, 'steps': 37780, 'loss/train': 1.882871389389038} +03/05/2022 09:52:37 - INFO - codeparrot_training - Step 37781: {'lr': 0.00043129122581524957, 'samples': 19344384, 'steps': 37781, 'loss/train': 1.4573744535446167} +03/05/2022 09:52:38 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 09:52:42 - INFO - codeparrot_training - Step 37782: {'lr': 0.0004312875716816363, 'samples': 19344896, 'steps': 37782, 'loss/train': 1.7316608428955078} +03/05/2022 09:52:45 - INFO - codeparrot_training - Step 37783: {'lr': 0.0004312839174663377, 'samples': 19345408, 'steps': 37783, 'loss/train': 2.343803882598877} +03/05/2022 09:52:46 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/05/2022 09:52:50 - INFO - codeparrot_training - Step 37784: {'lr': 0.0004312802631693553, 'samples': 19345920, 'steps': 37784, 'loss/train': 1.6742995977401733} +03/05/2022 09:52:54 - INFO - codeparrot_training - Step 37785: {'lr': 0.00043127660879069084, 'samples': 19346432, 'steps': 37785, 'loss/train': 2.2473838329315186} +03/05/2022 09:52:55 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 09:52:59 - INFO - codeparrot_training - Step 37786: {'lr': 0.00043127295433034594, 'samples': 19346944, 'steps': 37786, 'loss/train': 2.232743740081787} +03/05/2022 09:53:02 - INFO - codeparrot_training - Step 37787: {'lr': 0.00043126929978832217, 'samples': 19347456, 'steps': 37787, 'loss/train': 1.3695499897003174} +03/05/2022 09:53:03 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 09:53:07 - INFO - codeparrot_training - Step 37788: {'lr': 0.00043126564516462134, 'samples': 19347968, 'steps': 37788, 'loss/train': 2.0778491497039795} +03/05/2022 09:53:10 - INFO - codeparrot_training - Step 37789: {'lr': 0.000431261990459245, 'samples': 19348480, 'steps': 37789, 'loss/train': 1.8352696895599365} +03/05/2022 09:53:11 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/05/2022 09:53:16 - INFO - codeparrot_training - Step 37790: {'lr': 0.0004312583356721948, 'samples': 19348992, 'steps': 37790, 'loss/train': 1.71046781539917} +03/05/2022 09:53:19 - INFO - codeparrot_training - Step 37791: {'lr': 0.0004312546808034724, 'samples': 19349504, 'steps': 37791, 'loss/train': 1.7724194526672363} +03/05/2022 09:53:20 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/05/2022 09:53:24 - INFO - codeparrot_training - Step 37792: {'lr': 0.0004312510258530794, 'samples': 19350016, 'steps': 37792, 'loss/train': 2.159608840942383} +03/05/2022 09:53:27 - INFO - codeparrot_training - Step 37793: {'lr': 0.0004312473708210175, 'samples': 19350528, 'steps': 37793, 'loss/train': 1.398200273513794} +03/05/2022 09:53:29 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/05/2022 09:53:33 - INFO - codeparrot_training - Step 37794: {'lr': 0.0004312437157072884, 'samples': 19351040, 'steps': 37794, 'loss/train': 1.6174712181091309} +03/05/2022 09:53:36 - INFO - codeparrot_training - Step 37795: {'lr': 0.00043124006051189356, 'samples': 19351552, 'steps': 37795, 'loss/train': 1.7893608808517456} +03/05/2022 09:53:37 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 09:53:41 - INFO - codeparrot_training - Step 37796: {'lr': 0.0004312364052348348, 'samples': 19352064, 'steps': 37796, 'loss/train': 1.3551431894302368} +03/05/2022 09:53:44 - INFO - codeparrot_training - Step 37797: {'lr': 0.0004312327498761137, 'samples': 19352576, 'steps': 37797, 'loss/train': 1.1525990962982178} +03/05/2022 09:53:46 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 09:53:50 - INFO - codeparrot_training - Step 37798: {'lr': 0.000431229094435732, 'samples': 19353088, 'steps': 37798, 'loss/train': 1.3771398067474365} +03/05/2022 09:53:53 - INFO - codeparrot_training - Step 37799: {'lr': 0.0004312254389136911, 'samples': 19353600, 'steps': 37799, 'loss/train': 1.6611214876174927} +03/05/2022 09:53:54 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 09:53:59 - INFO - codeparrot_training - Step 37800: {'lr': 0.00043122178330999296, 'samples': 19354112, 'steps': 37800, 'loss/train': 2.1442809104919434} +03/05/2022 09:54:02 - INFO - codeparrot_training - Step 37801: {'lr': 0.0004312181276246391, 'samples': 19354624, 'steps': 37801, 'loss/train': 2.297694206237793} +03/05/2022 09:54:05 - INFO - codeparrot_training - Step 37802: {'lr': 0.00043121447185763106, 'samples': 19355136, 'steps': 37802, 'loss/train': 1.4977821111679077} +03/05/2022 09:54:06 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 09:54:10 - INFO - codeparrot_training - Step 37803: {'lr': 0.0004312108160089706, 'samples': 19355648, 'steps': 37803, 'loss/train': 1.7507095336914062} +03/05/2022 09:54:14 - INFO - codeparrot_training - Step 37804: {'lr': 0.00043120716007865933, 'samples': 19356160, 'steps': 37804, 'loss/train': 2.108715295791626} +03/05/2022 09:54:14 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/05/2022 09:54:19 - INFO - codeparrot_training - Step 37805: {'lr': 0.0004312035040666989, 'samples': 19356672, 'steps': 37805, 'loss/train': 1.2766791582107544} +03/05/2022 09:54:22 - INFO - codeparrot_training - Step 37806: {'lr': 0.000431199847973091, 'samples': 19357184, 'steps': 37806, 'loss/train': 2.2332565784454346} +03/05/2022 09:54:23 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/05/2022 09:54:28 - INFO - codeparrot_training - Step 37807: {'lr': 0.0004311961917978372, 'samples': 19357696, 'steps': 37807, 'loss/train': 0.2978323698043823} +03/05/2022 09:54:31 - INFO - codeparrot_training - Step 37808: {'lr': 0.0004311925355409393, 'samples': 19358208, 'steps': 37808, 'loss/train': 1.5337976217269897} +03/05/2022 09:54:32 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 09:54:36 - INFO - codeparrot_training - Step 37809: {'lr': 0.00043118887920239876, 'samples': 19358720, 'steps': 37809, 'loss/train': 1.8254008293151855} +03/05/2022 09:54:39 - INFO - codeparrot_training - Step 37810: {'lr': 0.00043118522278221726, 'samples': 19359232, 'steps': 37810, 'loss/train': 1.9307475090026855} +03/05/2022 09:54:41 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/05/2022 09:54:45 - INFO - codeparrot_training - Step 37811: {'lr': 0.0004311815662803966, 'samples': 19359744, 'steps': 37811, 'loss/train': 1.9755327701568604} +03/05/2022 09:54:48 - INFO - codeparrot_training - Step 37812: {'lr': 0.00043117790969693826, 'samples': 19360256, 'steps': 37812, 'loss/train': 2.0319652557373047} +03/05/2022 09:54:49 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 09:54:53 - INFO - codeparrot_training - Step 37813: {'lr': 0.00043117425303184395, 'samples': 19360768, 'steps': 37813, 'loss/train': 1.4949917793273926} +03/05/2022 09:54:56 - INFO - codeparrot_training - Step 37814: {'lr': 0.0004311705962851153, 'samples': 19361280, 'steps': 37814, 'loss/train': 2.1020655632019043} +03/05/2022 09:54:57 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 09:55:01 - INFO - codeparrot_training - Step 37815: {'lr': 0.000431166939456754, 'samples': 19361792, 'steps': 37815, 'loss/train': 1.9416307210922241} +03/05/2022 09:55:05 - INFO - codeparrot_training - Step 37816: {'lr': 0.0004311632825467617, 'samples': 19362304, 'steps': 37816, 'loss/train': 2.0847034454345703} +03/05/2022 09:55:06 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 09:55:10 - INFO - codeparrot_training - Step 37817: {'lr': 0.00043115962555514, 'samples': 19362816, 'steps': 37817, 'loss/train': 0.7069443464279175} +03/05/2022 09:55:13 - INFO - codeparrot_training - Step 37818: {'lr': 0.0004311559684818905, 'samples': 19363328, 'steps': 37818, 'loss/train': 1.5219447612762451} +03/05/2022 09:55:14 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 09:55:18 - INFO - codeparrot_training - Step 37819: {'lr': 0.000431152311327015, 'samples': 19363840, 'steps': 37819, 'loss/train': 0.9714227318763733} +03/05/2022 09:55:21 - INFO - codeparrot_training - Step 37820: {'lr': 0.00043114865409051505, 'samples': 19364352, 'steps': 37820, 'loss/train': 1.3239413499832153} +03/05/2022 09:55:23 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 09:55:27 - INFO - codeparrot_training - Step 37821: {'lr': 0.0004311449967723923, 'samples': 19364864, 'steps': 37821, 'loss/train': 1.6976454257965088} +03/05/2022 09:55:30 - INFO - codeparrot_training - Step 37822: {'lr': 0.00043114133937264843, 'samples': 19365376, 'steps': 37822, 'loss/train': 2.0362679958343506} +03/05/2022 09:55:31 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/05/2022 09:55:35 - INFO - codeparrot_training - Step 37823: {'lr': 0.000431137681891285, 'samples': 19365888, 'steps': 37823, 'loss/train': 2.265512228012085} +03/05/2022 09:55:39 - INFO - codeparrot_training - Step 37824: {'lr': 0.0004311340243283038, 'samples': 19366400, 'steps': 37824, 'loss/train': 2.2506303787231445} +03/05/2022 09:55:41 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/05/2022 09:55:44 - INFO - codeparrot_training - Step 37825: {'lr': 0.0004311303666837064, 'samples': 19366912, 'steps': 37825, 'loss/train': 1.6736390590667725} +03/05/2022 09:55:47 - INFO - codeparrot_training - Step 37826: {'lr': 0.0004311267089574944, 'samples': 19367424, 'steps': 37826, 'loss/train': 2.86852765083313} +03/05/2022 09:55:49 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/05/2022 09:55:52 - INFO - codeparrot_training - Step 37827: {'lr': 0.00043112305114966957, 'samples': 19367936, 'steps': 37827, 'loss/train': 1.225523591041565} +03/05/2022 09:55:56 - INFO - codeparrot_training - Step 37828: {'lr': 0.0004311193932602334, 'samples': 19368448, 'steps': 37828, 'loss/train': 1.3334112167358398} +03/05/2022 09:55:57 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 09:56:01 - INFO - codeparrot_training - Step 37829: {'lr': 0.0004311157352891877, 'samples': 19368960, 'steps': 37829, 'loss/train': 0.9887212514877319} +03/05/2022 09:56:04 - INFO - codeparrot_training - Step 37830: {'lr': 0.000431112077236534, 'samples': 19369472, 'steps': 37830, 'loss/train': 1.583834171295166} +03/05/2022 09:56:06 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/05/2022 09:56:09 - INFO - codeparrot_training - Step 37831: {'lr': 0.0004311084191022741, 'samples': 19369984, 'steps': 37831, 'loss/train': 0.7535305023193359} +03/05/2022 09:56:12 - INFO - codeparrot_training - Step 37832: {'lr': 0.00043110476088640935, 'samples': 19370496, 'steps': 37832, 'loss/train': 1.9746586084365845} +03/05/2022 09:56:14 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/05/2022 09:56:18 - INFO - codeparrot_training - Step 37833: {'lr': 0.00043110110258894177, 'samples': 19371008, 'steps': 37833, 'loss/train': 1.4196697473526} +03/05/2022 09:56:21 - INFO - codeparrot_training - Step 37834: {'lr': 0.00043109744420987274, 'samples': 19371520, 'steps': 37834, 'loss/train': 2.1329185962677} +03/05/2022 09:56:23 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 09:56:26 - INFO - codeparrot_training - Step 37835: {'lr': 0.000431093785749204, 'samples': 19372032, 'steps': 37835, 'loss/train': 1.0034433603286743} +03/05/2022 09:56:29 - INFO - codeparrot_training - Step 37836: {'lr': 0.00043109012720693717, 'samples': 19372544, 'steps': 37836, 'loss/train': 1.9124420881271362} +03/05/2022 09:56:31 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 09:56:35 - INFO - codeparrot_training - Step 37837: {'lr': 0.000431086468583074, 'samples': 19373056, 'steps': 37837, 'loss/train': 1.365955114364624} +03/05/2022 09:56:38 - INFO - codeparrot_training - Step 37838: {'lr': 0.00043108280987761593, 'samples': 19373568, 'steps': 37838, 'loss/train': 1.408068060874939} +03/05/2022 09:56:39 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 09:56:43 - INFO - codeparrot_training - Step 37839: {'lr': 0.0004310791510905649, 'samples': 19374080, 'steps': 37839, 'loss/train': 0.8463644981384277} +03/05/2022 09:56:46 - INFO - codeparrot_training - Step 37840: {'lr': 0.00043107549222192235, 'samples': 19374592, 'steps': 37840, 'loss/train': 0.7807109355926514} +03/05/2022 09:56:48 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/05/2022 09:56:52 - INFO - codeparrot_training - Step 37841: {'lr': 0.0004310718332716899, 'samples': 19375104, 'steps': 37841, 'loss/train': 2.167212724685669} +03/05/2022 09:56:55 - INFO - codeparrot_training - Step 37842: {'lr': 0.00043106817423986933, 'samples': 19375616, 'steps': 37842, 'loss/train': 2.1488988399505615} +03/05/2022 09:56:59 - INFO - codeparrot_training - Step 37843: {'lr': 0.00043106451512646226, 'samples': 19376128, 'steps': 37843, 'loss/train': 2.0155699253082275} +03/05/2022 09:56:59 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/05/2022 09:57:04 - INFO - codeparrot_training - Step 37844: {'lr': 0.00043106085593147027, 'samples': 19376640, 'steps': 37844, 'loss/train': 1.9916726350784302} +03/05/2022 09:57:07 - INFO - codeparrot_training - Step 37845: {'lr': 0.00043105719665489505, 'samples': 19377152, 'steps': 37845, 'loss/train': 1.4611812829971313} +03/05/2022 09:57:08 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/05/2022 09:57:13 - INFO - codeparrot_training - Step 37846: {'lr': 0.0004310535372967383, 'samples': 19377664, 'steps': 37846, 'loss/train': 1.6552391052246094} +03/05/2022 09:57:16 - INFO - codeparrot_training - Step 37847: {'lr': 0.0004310498778570016, 'samples': 19378176, 'steps': 37847, 'loss/train': 1.7216873168945312} +03/05/2022 09:57:16 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 09:57:21 - INFO - codeparrot_training - Step 37848: {'lr': 0.0004310462183356866, 'samples': 19378688, 'steps': 37848, 'loss/train': 1.8481634855270386} +03/05/2022 09:57:24 - INFO - codeparrot_training - Step 37849: {'lr': 0.00043104255873279497, 'samples': 19379200, 'steps': 37849, 'loss/train': 0.41859790682792664} +03/05/2022 09:57:25 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 09:57:30 - INFO - codeparrot_training - Step 37850: {'lr': 0.00043103889904832837, 'samples': 19379712, 'steps': 37850, 'loss/train': 1.8630810976028442} +03/05/2022 09:57:33 - INFO - codeparrot_training - Step 37851: {'lr': 0.0004310352392822884, 'samples': 19380224, 'steps': 37851, 'loss/train': 1.5084508657455444} +03/05/2022 09:57:33 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 09:57:38 - INFO - codeparrot_training - Step 37852: {'lr': 0.00043103157943467674, 'samples': 19380736, 'steps': 37852, 'loss/train': 1.2454826831817627} +03/05/2022 09:57:41 - INFO - codeparrot_training - Step 37853: {'lr': 0.00043102791950549513, 'samples': 19381248, 'steps': 37853, 'loss/train': 2.375488519668579} +03/05/2022 09:57:42 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 09:57:46 - INFO - codeparrot_training - Step 37854: {'lr': 0.00043102425949474504, 'samples': 19381760, 'steps': 37854, 'loss/train': 1.4127804040908813} +03/05/2022 09:57:50 - INFO - codeparrot_training - Step 37855: {'lr': 0.00043102059940242825, 'samples': 19382272, 'steps': 37855, 'loss/train': 1.1573930978775024} +03/05/2022 09:57:50 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 09:57:55 - INFO - codeparrot_training - Step 37856: {'lr': 0.0004310169392285464, 'samples': 19382784, 'steps': 37856, 'loss/train': 1.4555232524871826} +03/05/2022 09:57:58 - INFO - codeparrot_training - Step 37857: {'lr': 0.0004310132789731011, 'samples': 19383296, 'steps': 37857, 'loss/train': 2.1113319396972656} +03/05/2022 09:57:58 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/05/2022 09:58:03 - INFO - codeparrot_training - Step 37858: {'lr': 0.000431009618636094, 'samples': 19383808, 'steps': 37858, 'loss/train': 0.9203762412071228} +03/05/2022 09:58:06 - INFO - codeparrot_training - Step 37859: {'lr': 0.00043100595821752674, 'samples': 19384320, 'steps': 37859, 'loss/train': 0.479248970746994} +03/05/2022 09:58:06 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/05/2022 09:58:12 - INFO - codeparrot_training - Step 37860: {'lr': 0.00043100229771740096, 'samples': 19384832, 'steps': 37860, 'loss/train': 2.3405745029449463} +03/05/2022 09:58:15 - INFO - codeparrot_training - Step 37861: {'lr': 0.0004309986371357184, 'samples': 19385344, 'steps': 37861, 'loss/train': 1.7067760229110718} +03/05/2022 09:58:15 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 09:58:20 - INFO - codeparrot_training - Step 37862: {'lr': 0.00043099497647248065, 'samples': 19385856, 'steps': 37862, 'loss/train': 1.9136826992034912} +03/05/2022 09:58:23 - INFO - codeparrot_training - Step 37863: {'lr': 0.00043099131572768936, 'samples': 19386368, 'steps': 37863, 'loss/train': 2.0518929958343506} +03/05/2022 09:58:23 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/05/2022 09:58:29 - INFO - codeparrot_training - Step 37864: {'lr': 0.00043098765490134607, 'samples': 19386880, 'steps': 37864, 'loss/train': 1.5379780530929565} +03/05/2022 09:58:32 - INFO - codeparrot_training - Step 37865: {'lr': 0.00043098399399345267, 'samples': 19387392, 'steps': 37865, 'loss/train': 2.418001651763916} +03/05/2022 09:58:32 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/05/2022 09:58:37 - INFO - codeparrot_training - Step 37866: {'lr': 0.0004309803330040106, 'samples': 19387904, 'steps': 37866, 'loss/train': 1.712828516960144} +03/05/2022 09:58:40 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 09:58:42 - INFO - codeparrot_training - Step 37867: {'lr': 0.0004309766719330216, 'samples': 19388416, 'steps': 37867, 'loss/train': 1.4560084342956543} +03/05/2022 09:58:45 - INFO - codeparrot_training - Step 37868: {'lr': 0.00043097301078048736, 'samples': 19388928, 'steps': 37868, 'loss/train': 2.5097904205322266} +03/05/2022 09:58:48 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 09:58:51 - INFO - codeparrot_training - Step 37869: {'lr': 0.00043096934954640935, 'samples': 19389440, 'steps': 37869, 'loss/train': 1.4404182434082031} +03/05/2022 09:58:55 - INFO - codeparrot_training - Step 37870: {'lr': 0.0004309656882307894, 'samples': 19389952, 'steps': 37870, 'loss/train': 2.1487138271331787} +03/05/2022 09:58:58 - INFO - codeparrot_training - Step 37871: {'lr': 0.0004309620268336292, 'samples': 19390464, 'steps': 37871, 'loss/train': 2.233774423599243} +03/05/2022 09:59:00 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/05/2022 09:59:03 - INFO - codeparrot_training - Step 37872: {'lr': 0.0004309583653549302, 'samples': 19390976, 'steps': 37872, 'loss/train': 1.2517735958099365} +03/05/2022 09:59:06 - INFO - codeparrot_training - Step 37873: {'lr': 0.0004309547037946941, 'samples': 19391488, 'steps': 37873, 'loss/train': 2.06913161277771} +03/05/2022 09:59:09 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 09:59:12 - INFO - codeparrot_training - Step 37874: {'lr': 0.0004309510421529227, 'samples': 19392000, 'steps': 37874, 'loss/train': 0.6754811406135559} +03/05/2022 09:59:15 - INFO - codeparrot_training - Step 37875: {'lr': 0.00043094738042961754, 'samples': 19392512, 'steps': 37875, 'loss/train': 1.6046569347381592} +03/05/2022 09:59:18 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/05/2022 09:59:20 - INFO - codeparrot_training - Step 37876: {'lr': 0.0004309437186247803, 'samples': 19393024, 'steps': 37876, 'loss/train': 0.9485911726951599} +03/05/2022 09:59:23 - INFO - codeparrot_training - Step 37877: {'lr': 0.00043094005673841257, 'samples': 19393536, 'steps': 37877, 'loss/train': 1.1992535591125488} +03/05/2022 09:59:26 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/05/2022 09:59:28 - INFO - codeparrot_training - Step 37878: {'lr': 0.00043093639477051606, 'samples': 19394048, 'steps': 37878, 'loss/train': 1.6024229526519775} +03/05/2022 09:59:32 - INFO - codeparrot_training - Step 37879: {'lr': 0.0004309327327210923, 'samples': 19394560, 'steps': 37879, 'loss/train': 0.654205858707428} +03/05/2022 09:59:35 - INFO - codeparrot_training - Step 37880: {'lr': 0.00043092907059014325, 'samples': 19395072, 'steps': 37880, 'loss/train': 0.5172398686408997} +03/05/2022 09:59:35 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 09:59:40 - INFO - codeparrot_training - Step 37881: {'lr': 0.00043092540837767025, 'samples': 19395584, 'steps': 37881, 'loss/train': 1.5750616788864136} +03/05/2022 09:59:43 - INFO - codeparrot_training - Step 37882: {'lr': 0.000430921746083675, 'samples': 19396096, 'steps': 37882, 'loss/train': 1.0613892078399658} +03/05/2022 09:59:43 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/05/2022 09:59:49 - INFO - codeparrot_training - Step 37883: {'lr': 0.00043091808370815935, 'samples': 19396608, 'steps': 37883, 'loss/train': 1.5158919095993042} +03/05/2022 09:59:51 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/05/2022 09:59:54 - INFO - codeparrot_training - Step 37884: {'lr': 0.0004309144212511246, 'samples': 19397120, 'steps': 37884, 'loss/train': 1.949298620223999} +03/05/2022 09:59:57 - INFO - codeparrot_training - Step 37885: {'lr': 0.00043091075871257275, 'samples': 19397632, 'steps': 37885, 'loss/train': 0.6992967128753662} +03/05/2022 10:00:00 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/05/2022 10:00:02 - INFO - codeparrot_training - Step 37886: {'lr': 0.0004309070960925052, 'samples': 19398144, 'steps': 37886, 'loss/train': 1.865713119506836} +03/05/2022 10:00:06 - INFO - codeparrot_training - Step 37887: {'lr': 0.0004309034333909238, 'samples': 19398656, 'steps': 37887, 'loss/train': 1.5458478927612305} +03/05/2022 10:00:08 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 10:00:11 - INFO - codeparrot_training - Step 37888: {'lr': 0.0004308997706078301, 'samples': 19399168, 'steps': 37888, 'loss/train': 1.2711635828018188} +03/05/2022 10:00:14 - INFO - codeparrot_training - Step 37889: {'lr': 0.00043089610774322575, 'samples': 19399680, 'steps': 37889, 'loss/train': 1.9903497695922852} +03/05/2022 10:00:16 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 10:00:19 - INFO - codeparrot_training - Step 37890: {'lr': 0.00043089244479711233, 'samples': 19400192, 'steps': 37890, 'loss/train': 1.5419869422912598} +03/05/2022 10:00:22 - INFO - codeparrot_training - Step 37891: {'lr': 0.00043088878176949163, 'samples': 19400704, 'steps': 37891, 'loss/train': 1.056033730506897} +03/05/2022 10:00:25 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/05/2022 10:00:28 - INFO - codeparrot_training - Step 37892: {'lr': 0.0004308851186603652, 'samples': 19401216, 'steps': 37892, 'loss/train': 1.2840379476547241} +03/05/2022 10:00:31 - INFO - codeparrot_training - Step 37893: {'lr': 0.0004308814554697348, 'samples': 19401728, 'steps': 37893, 'loss/train': 1.9513486623764038} +03/05/2022 10:00:33 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/05/2022 10:00:36 - INFO - codeparrot_training - Step 37894: {'lr': 0.0004308777921976019, 'samples': 19402240, 'steps': 37894, 'loss/train': 1.9178094863891602} +03/05/2022 10:00:39 - INFO - codeparrot_training - Step 37895: {'lr': 0.00043087412884396835, 'samples': 19402752, 'steps': 37895, 'loss/train': 2.4974148273468018} +03/05/2022 10:00:42 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/05/2022 10:00:45 - INFO - codeparrot_training - Step 37896: {'lr': 0.0004308704654088357, 'samples': 19403264, 'steps': 37896, 'loss/train': 1.3881484270095825} +03/05/2022 10:00:48 - INFO - codeparrot_training - Step 37897: {'lr': 0.00043086680189220554, 'samples': 19403776, 'steps': 37897, 'loss/train': 1.8198308944702148} +03/05/2022 10:00:50 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 10:00:53 - INFO - codeparrot_training - Step 37898: {'lr': 0.00043086313829407966, 'samples': 19404288, 'steps': 37898, 'loss/train': 1.6075712442398071} +03/05/2022 10:00:56 - INFO - codeparrot_training - Step 37899: {'lr': 0.0004308594746144596, 'samples': 19404800, 'steps': 37899, 'loss/train': 1.4757670164108276} +03/05/2022 10:00:59 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 10:01:02 - INFO - codeparrot_training - Step 37900: {'lr': 0.0004308558108533471, 'samples': 19405312, 'steps': 37900, 'loss/train': 2.1862990856170654} +03/05/2022 10:01:05 - INFO - codeparrot_training - Step 37901: {'lr': 0.0004308521470107437, 'samples': 19405824, 'steps': 37901, 'loss/train': 1.7562228441238403} +03/05/2022 10:01:07 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 10:01:10 - INFO - codeparrot_training - Step 37902: {'lr': 0.00043084848308665115, 'samples': 19406336, 'steps': 37902, 'loss/train': 0.6028088331222534} +03/05/2022 10:01:13 - INFO - codeparrot_training - Step 37903: {'lr': 0.00043084481908107103, 'samples': 19406848, 'steps': 37903, 'loss/train': 1.7737396955490112} +03/05/2022 10:01:16 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 10:01:18 - INFO - codeparrot_training - Step 37904: {'lr': 0.00043084115499400505, 'samples': 19407360, 'steps': 37904, 'loss/train': 1.7592620849609375} +03/05/2022 10:01:22 - INFO - codeparrot_training - Step 37905: {'lr': 0.0004308374908254549, 'samples': 19407872, 'steps': 37905, 'loss/train': 1.9128062725067139} +03/05/2022 10:01:24 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 10:01:27 - INFO - codeparrot_training - Step 37906: {'lr': 0.000430833826575422, 'samples': 19408384, 'steps': 37906, 'loss/train': 1.6850887537002563} +03/05/2022 10:01:30 - INFO - codeparrot_training - Step 37907: {'lr': 0.0004308301622439083, 'samples': 19408896, 'steps': 37907, 'loss/train': 1.5409479141235352} +03/05/2022 10:01:34 - INFO - codeparrot_training - Step 37908: {'lr': 0.0004308264978309153, 'samples': 19409408, 'steps': 37908, 'loss/train': 2.578800678253174} +03/05/2022 10:01:34 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 10:01:39 - INFO - codeparrot_training - Step 37909: {'lr': 0.0004308228333364447, 'samples': 19409920, 'steps': 37909, 'loss/train': 0.9439515471458435} +03/05/2022 10:01:42 - INFO - codeparrot_training - Step 37910: {'lr': 0.000430819168760498, 'samples': 19410432, 'steps': 37910, 'loss/train': 2.389791250228882} +03/05/2022 10:01:42 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 10:01:47 - INFO - codeparrot_training - Step 37911: {'lr': 0.0004308155041030771, 'samples': 19410944, 'steps': 37911, 'loss/train': 1.799576997756958} +03/05/2022 10:01:50 - INFO - codeparrot_training - Step 37912: {'lr': 0.00043081183936418343, 'samples': 19411456, 'steps': 37912, 'loss/train': 1.7143737077713013} +03/05/2022 10:01:51 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 10:01:56 - INFO - codeparrot_training - Step 37913: {'lr': 0.0004308081745438188, 'samples': 19411968, 'steps': 37913, 'loss/train': 1.6213486194610596} +03/05/2022 10:01:59 - INFO - codeparrot_training - Step 37914: {'lr': 0.00043080450964198483, 'samples': 19412480, 'steps': 37914, 'loss/train': 1.3872851133346558} +03/05/2022 10:02:00 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 10:02:04 - INFO - codeparrot_training - Step 37915: {'lr': 0.00043080084465868307, 'samples': 19412992, 'steps': 37915, 'loss/train': 2.8674635887145996} +03/05/2022 10:02:07 - INFO - codeparrot_training - Step 37916: {'lr': 0.0004307971795939152, 'samples': 19413504, 'steps': 37916, 'loss/train': 1.8053076267242432} +03/05/2022 10:02:08 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 10:02:13 - INFO - codeparrot_training - Step 37917: {'lr': 0.000430793514447683, 'samples': 19414016, 'steps': 37917, 'loss/train': 1.7051365375518799} +03/05/2022 10:02:16 - INFO - codeparrot_training - Step 37918: {'lr': 0.000430789849219988, 'samples': 19414528, 'steps': 37918, 'loss/train': 2.388674259185791} +03/05/2022 10:02:17 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 10:02:21 - INFO - codeparrot_training - Step 37919: {'lr': 0.0004307861839108319, 'samples': 19415040, 'steps': 37919, 'loss/train': 2.0870227813720703} +03/05/2022 10:02:24 - INFO - codeparrot_training - Step 37920: {'lr': 0.00043078251852021634, 'samples': 19415552, 'steps': 37920, 'loss/train': 1.7244954109191895} +03/05/2022 10:02:26 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 10:02:29 - INFO - codeparrot_training - Step 37921: {'lr': 0.0004307788530481429, 'samples': 19416064, 'steps': 37921, 'loss/train': 1.7325867414474487} +03/05/2022 10:02:33 - INFO - codeparrot_training - Step 37922: {'lr': 0.00043077518749461336, 'samples': 19416576, 'steps': 37922, 'loss/train': 2.560762643814087} +03/05/2022 10:02:34 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 10:02:38 - INFO - codeparrot_training - Step 37923: {'lr': 0.00043077152185962933, 'samples': 19417088, 'steps': 37923, 'loss/train': 0.9312105178833008} +03/05/2022 10:02:41 - INFO - codeparrot_training - Step 37924: {'lr': 0.00043076785614319234, 'samples': 19417600, 'steps': 37924, 'loss/train': 1.1721289157867432} +03/05/2022 10:02:42 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/05/2022 10:02:46 - INFO - codeparrot_training - Step 37925: {'lr': 0.0004307641903453042, 'samples': 19418112, 'steps': 37925, 'loss/train': 1.2373802661895752} +03/05/2022 10:02:49 - INFO - codeparrot_training - Step 37926: {'lr': 0.00043076052446596656, 'samples': 19418624, 'steps': 37926, 'loss/train': 1.4676055908203125} +03/05/2022 10:02:51 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/05/2022 10:02:55 - INFO - codeparrot_training - Step 37927: {'lr': 0.000430756858505181, 'samples': 19419136, 'steps': 37927, 'loss/train': 1.3450775146484375} +03/05/2022 10:02:58 - INFO - codeparrot_training - Step 37928: {'lr': 0.00043075319246294914, 'samples': 19419648, 'steps': 37928, 'loss/train': 1.497881293296814} +03/05/2022 10:03:00 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/05/2022 10:03:03 - INFO - codeparrot_training - Step 37929: {'lr': 0.0004307495263392727, 'samples': 19420160, 'steps': 37929, 'loss/train': 1.7660783529281616} +03/05/2022 10:03:06 - INFO - codeparrot_training - Step 37930: {'lr': 0.00043074586013415337, 'samples': 19420672, 'steps': 37930, 'loss/train': 1.8334081172943115} +03/05/2022 10:03:08 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 10:03:12 - INFO - codeparrot_training - Step 37931: {'lr': 0.0004307421938475926, 'samples': 19421184, 'steps': 37931, 'loss/train': 1.2816463708877563} +03/05/2022 10:03:15 - INFO - codeparrot_training - Step 37932: {'lr': 0.0004307385274795923, 'samples': 19421696, 'steps': 37932, 'loss/train': 1.2902185916900635} +03/05/2022 10:03:17 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/05/2022 10:03:20 - INFO - codeparrot_training - Step 37933: {'lr': 0.000430734861030154, 'samples': 19422208, 'steps': 37933, 'loss/train': 1.2918124198913574} +03/05/2022 10:03:23 - INFO - codeparrot_training - Step 37934: {'lr': 0.0004307311944992793, 'samples': 19422720, 'steps': 37934, 'loss/train': 1.2767144441604614} +03/05/2022 10:03:26 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 10:03:29 - INFO - codeparrot_training - Step 37935: {'lr': 0.00043072752788697003, 'samples': 19423232, 'steps': 37935, 'loss/train': 1.543299674987793} +03/05/2022 10:03:32 - INFO - codeparrot_training - Step 37936: {'lr': 0.0004307238611932276, 'samples': 19423744, 'steps': 37936, 'loss/train': 2.0173611640930176} +03/05/2022 10:03:35 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/05/2022 10:03:37 - INFO - codeparrot_training - Step 37937: {'lr': 0.00043072019441805386, 'samples': 19424256, 'steps': 37937, 'loss/train': 1.5570237636566162} +03/05/2022 10:03:40 - INFO - codeparrot_training - Step 37938: {'lr': 0.00043071652756145035, 'samples': 19424768, 'steps': 37938, 'loss/train': 2.2263429164886475} +03/05/2022 10:03:43 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/05/2022 10:03:45 - INFO - codeparrot_training - Step 37939: {'lr': 0.0004307128606234188, 'samples': 19425280, 'steps': 37939, 'loss/train': 1.314713478088379} +03/05/2022 10:03:49 - INFO - codeparrot_training - Step 37940: {'lr': 0.00043070919360396076, 'samples': 19425792, 'steps': 37940, 'loss/train': 1.3388396501541138} +03/05/2022 10:03:51 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 10:03:54 - INFO - codeparrot_training - Step 37941: {'lr': 0.00043070552650307804, 'samples': 19426304, 'steps': 37941, 'loss/train': 1.635088562965393} +03/05/2022 10:03:57 - INFO - codeparrot_training - Step 37942: {'lr': 0.0004307018593207721, 'samples': 19426816, 'steps': 37942, 'loss/train': 1.5173020362854004} +03/05/2022 10:04:00 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/05/2022 10:04:02 - INFO - codeparrot_training - Step 37943: {'lr': 0.0004306981920570447, 'samples': 19427328, 'steps': 37943, 'loss/train': 1.4404644966125488} +03/05/2022 10:04:06 - INFO - codeparrot_training - Step 37944: {'lr': 0.00043069452471189765, 'samples': 19427840, 'steps': 37944, 'loss/train': 1.4728060960769653} +03/05/2022 10:04:08 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 10:04:11 - INFO - codeparrot_training - Step 37945: {'lr': 0.00043069085728533225, 'samples': 19428352, 'steps': 37945, 'loss/train': 1.2332886457443237} +03/05/2022 10:04:14 - INFO - codeparrot_training - Step 37946: {'lr': 0.0004306871897773504, 'samples': 19428864, 'steps': 37946, 'loss/train': 0.8373779654502869} +03/05/2022 10:04:16 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 10:04:19 - INFO - codeparrot_training - Step 37947: {'lr': 0.0004306835221879537, 'samples': 19429376, 'steps': 37947, 'loss/train': 1.0834929943084717} +03/05/2022 10:04:23 - INFO - codeparrot_training - Step 37948: {'lr': 0.00043067985451714373, 'samples': 19429888, 'steps': 37948, 'loss/train': 1.9333170652389526} +03/05/2022 10:04:25 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/05/2022 10:04:28 - INFO - codeparrot_training - Step 37949: {'lr': 0.0004306761867649223, 'samples': 19430400, 'steps': 37949, 'loss/train': 0.6583194136619568} +03/05/2022 10:04:31 - INFO - codeparrot_training - Step 37950: {'lr': 0.0004306725189312909, 'samples': 19430912, 'steps': 37950, 'loss/train': 2.0297975540161133} +03/05/2022 10:04:33 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 10:04:36 - INFO - codeparrot_training - Step 37951: {'lr': 0.00043066885101625133, 'samples': 19431424, 'steps': 37951, 'loss/train': 1.6340519189834595} +03/05/2022 10:04:39 - INFO - codeparrot_training - Step 37952: {'lr': 0.00043066518301980504, 'samples': 19431936, 'steps': 37952, 'loss/train': 1.442530632019043} +03/05/2022 10:04:42 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 10:04:45 - INFO - codeparrot_training - Step 37953: {'lr': 0.00043066151494195387, 'samples': 19432448, 'steps': 37953, 'loss/train': 1.726385235786438} +03/05/2022 10:04:48 - INFO - codeparrot_training - Step 37954: {'lr': 0.00043065784678269944, 'samples': 19432960, 'steps': 37954, 'loss/train': 1.6143455505371094} +03/05/2022 10:04:50 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 10:04:53 - INFO - codeparrot_training - Step 37955: {'lr': 0.00043065417854204333, 'samples': 19433472, 'steps': 37955, 'loss/train': 1.7574493885040283} +03/05/2022 10:04:56 - INFO - codeparrot_training - Step 37956: {'lr': 0.0004306505102199872, 'samples': 19433984, 'steps': 37956, 'loss/train': 0.9709206819534302} +03/05/2022 10:04:59 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 10:05:02 - INFO - codeparrot_training - Step 37957: {'lr': 0.0004306468418165328, 'samples': 19434496, 'steps': 37957, 'loss/train': 2.6438684463500977} +03/05/2022 10:05:05 - INFO - codeparrot_training - Step 37958: {'lr': 0.0004306431733316817, 'samples': 19435008, 'steps': 37958, 'loss/train': 0.70161372423172} +03/05/2022 10:05:07 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/05/2022 10:05:10 - INFO - codeparrot_training - Step 37959: {'lr': 0.00043063950476543563, 'samples': 19435520, 'steps': 37959, 'loss/train': 1.897818922996521} +03/05/2022 10:05:13 - INFO - codeparrot_training - Step 37960: {'lr': 0.0004306358361177961, 'samples': 19436032, 'steps': 37960, 'loss/train': 1.259172797203064} +03/05/2022 10:05:15 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 10:05:18 - INFO - codeparrot_training - Step 37961: {'lr': 0.00043063216738876487, 'samples': 19436544, 'steps': 37961, 'loss/train': 2.1958703994750977} +03/05/2022 10:05:22 - INFO - codeparrot_training - Step 37962: {'lr': 0.0004306284985783436, 'samples': 19437056, 'steps': 37962, 'loss/train': 1.7720119953155518} +03/05/2022 10:05:24 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 10:05:27 - INFO - codeparrot_training - Step 37963: {'lr': 0.00043062482968653394, 'samples': 19437568, 'steps': 37963, 'loss/train': 1.9559146165847778} +03/05/2022 10:05:30 - INFO - codeparrot_training - Step 37964: {'lr': 0.00043062116071333745, 'samples': 19438080, 'steps': 37964, 'loss/train': 1.357301950454712} +03/05/2022 10:05:32 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 10:05:35 - INFO - codeparrot_training - Step 37965: {'lr': 0.0004306174916587559, 'samples': 19438592, 'steps': 37965, 'loss/train': 1.9595907926559448} +03/05/2022 10:05:39 - INFO - codeparrot_training - Step 37966: {'lr': 0.0004306138225227909, 'samples': 19439104, 'steps': 37966, 'loss/train': 2.7268028259277344} +03/05/2022 10:05:41 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/05/2022 10:05:44 - INFO - codeparrot_training - Step 37967: {'lr': 0.0004306101533054441, 'samples': 19439616, 'steps': 37967, 'loss/train': 1.7862026691436768} +03/05/2022 10:05:47 - INFO - codeparrot_training - Step 37968: {'lr': 0.0004306064840067171, 'samples': 19440128, 'steps': 37968, 'loss/train': 1.9523568153381348} +03/05/2022 10:05:49 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 10:05:52 - INFO - codeparrot_training - Step 37969: {'lr': 0.00043060281462661165, 'samples': 19440640, 'steps': 37969, 'loss/train': 0.8942475914955139} +03/05/2022 10:05:56 - INFO - codeparrot_training - Step 37970: {'lr': 0.0004305991451651293, 'samples': 19441152, 'steps': 37970, 'loss/train': 1.7197656631469727} +03/05/2022 10:05:57 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/05/2022 10:06:01 - INFO - codeparrot_training - Step 37971: {'lr': 0.00043059547562227185, 'samples': 19441664, 'steps': 37971, 'loss/train': 1.4768589735031128} +03/05/2022 10:06:04 - INFO - codeparrot_training - Step 37972: {'lr': 0.0004305918059980408, 'samples': 19442176, 'steps': 37972, 'loss/train': 1.7680021524429321} +03/05/2022 10:06:06 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 10:06:09 - INFO - codeparrot_training - Step 37973: {'lr': 0.00043058813629243787, 'samples': 19442688, 'steps': 37973, 'loss/train': 1.9044512510299683} +03/05/2022 10:06:12 - INFO - codeparrot_training - Step 37974: {'lr': 0.0004305844665054648, 'samples': 19443200, 'steps': 37974, 'loss/train': 1.1323875188827515} +03/05/2022 10:06:14 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 10:06:18 - INFO - codeparrot_training - Step 37975: {'lr': 0.00043058079663712304, 'samples': 19443712, 'steps': 37975, 'loss/train': 2.3119680881500244} +03/05/2022 10:06:21 - INFO - codeparrot_training - Step 37976: {'lr': 0.00043057712668741443, 'samples': 19444224, 'steps': 37976, 'loss/train': 0.06984040141105652} +03/05/2022 10:06:23 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 10:06:26 - INFO - codeparrot_training - Step 37977: {'lr': 0.0004305734566563405, 'samples': 19444736, 'steps': 37977, 'loss/train': 0.8590648174285889} +03/05/2022 10:06:30 - INFO - codeparrot_training - Step 37978: {'lr': 0.000430569786543903, 'samples': 19445248, 'steps': 37978, 'loss/train': 0.6823554039001465} +03/05/2022 10:06:32 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 10:06:35 - INFO - codeparrot_training - Step 37979: {'lr': 0.00043056611635010355, 'samples': 19445760, 'steps': 37979, 'loss/train': 1.363161325454712} +03/05/2022 10:06:38 - INFO - codeparrot_training - Step 37980: {'lr': 0.00043056244607494375, 'samples': 19446272, 'steps': 37980, 'loss/train': 1.6660517454147339} +03/05/2022 10:06:40 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/05/2022 10:06:43 - INFO - codeparrot_training - Step 37981: {'lr': 0.0004305587757184254, 'samples': 19446784, 'steps': 37981, 'loss/train': 0.1975853443145752} +03/05/2022 10:06:46 - INFO - codeparrot_training - Step 37982: {'lr': 0.0004305551052805499, 'samples': 19447296, 'steps': 37982, 'loss/train': 1.9760558605194092} +03/05/2022 10:06:48 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 10:06:52 - INFO - codeparrot_training - Step 37983: {'lr': 0.0004305514347613191, 'samples': 19447808, 'steps': 37983, 'loss/train': 1.937135100364685} +03/05/2022 10:06:55 - INFO - codeparrot_training - Step 37984: {'lr': 0.0004305477641607347, 'samples': 19448320, 'steps': 37984, 'loss/train': 1.763716220855713} +03/05/2022 10:06:57 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 10:07:00 - INFO - codeparrot_training - Step 37985: {'lr': 0.0004305440934787982, 'samples': 19448832, 'steps': 37985, 'loss/train': 0.0665673241019249} +03/05/2022 10:07:03 - INFO - codeparrot_training - Step 37986: {'lr': 0.0004305404227155113, 'samples': 19449344, 'steps': 37986, 'loss/train': 1.5655131340026855} +03/05/2022 10:07:05 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/05/2022 10:07:09 - INFO - codeparrot_training - Step 37987: {'lr': 0.0004305367518708757, 'samples': 19449856, 'steps': 37987, 'loss/train': 2.1082358360290527} +03/05/2022 10:07:12 - INFO - codeparrot_training - Step 37988: {'lr': 0.000430533080944893, 'samples': 19450368, 'steps': 37988, 'loss/train': 1.0807193517684937} +03/05/2022 10:07:14 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 10:07:17 - INFO - codeparrot_training - Step 37989: {'lr': 0.00043052940993756493, 'samples': 19450880, 'steps': 37989, 'loss/train': 1.4271621704101562} +03/05/2022 10:07:20 - INFO - codeparrot_training - Step 37990: {'lr': 0.00043052573884889305, 'samples': 19451392, 'steps': 37990, 'loss/train': 1.86587655544281} +03/05/2022 10:07:23 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/05/2022 10:07:26 - INFO - codeparrot_training - Step 37991: {'lr': 0.00043052206767887907, 'samples': 19451904, 'steps': 37991, 'loss/train': 0.9810808300971985} +03/05/2022 10:07:29 - INFO - codeparrot_training - Step 37992: {'lr': 0.00043051839642752466, 'samples': 19452416, 'steps': 37992, 'loss/train': 1.541760802268982} +03/05/2022 10:07:31 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 10:07:34 - INFO - codeparrot_training - Step 37993: {'lr': 0.00043051472509483135, 'samples': 19452928, 'steps': 37993, 'loss/train': 1.4545832872390747} +03/05/2022 10:07:37 - INFO - codeparrot_training - Step 37994: {'lr': 0.00043051105368080103, 'samples': 19453440, 'steps': 37994, 'loss/train': 0.8379783630371094} +03/05/2022 10:07:39 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 10:07:42 - INFO - codeparrot_training - Step 37995: {'lr': 0.00043050738218543505, 'samples': 19453952, 'steps': 37995, 'loss/train': 1.8295984268188477} +03/05/2022 10:07:45 - INFO - codeparrot_training - Step 37996: {'lr': 0.00043050371060873537, 'samples': 19454464, 'steps': 37996, 'loss/train': 1.9095585346221924} +03/05/2022 10:07:47 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 10:07:51 - INFO - codeparrot_training - Step 37997: {'lr': 0.00043050003895070345, 'samples': 19454976, 'steps': 37997, 'loss/train': 1.7113313674926758} +03/05/2022 10:07:54 - INFO - codeparrot_training - Step 37998: {'lr': 0.000430496367211341, 'samples': 19455488, 'steps': 37998, 'loss/train': 1.446597933769226} +03/05/2022 10:07:55 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 10:07:59 - INFO - codeparrot_training - Step 37999: {'lr': 0.00043049269539064967, 'samples': 19456000, 'steps': 37999, 'loss/train': 1.3205163478851318} +03/05/2022 10:08:02 - INFO - codeparrot_training - Step 38000: {'lr': 0.0004304890234886311, 'samples': 19456512, 'steps': 38000, 'loss/train': 0.43644800782203674} +03/05/2022 10:08:04 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/05/2022 10:08:08 - INFO - codeparrot_training - Step 38001: {'lr': 0.000430485351505287, 'samples': 19457024, 'steps': 38001, 'loss/train': 2.2119436264038086} +03/05/2022 10:08:11 - INFO - codeparrot_training - Step 38002: {'lr': 0.000430481679440619, 'samples': 19457536, 'steps': 38002, 'loss/train': 1.445844054222107} +03/05/2022 10:08:12 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 10:08:16 - INFO - codeparrot_training - Step 38003: {'lr': 0.0004304780072946287, 'samples': 19458048, 'steps': 38003, 'loss/train': 1.1881942749023438} +03/05/2022 10:08:19 - INFO - codeparrot_training - Step 38004: {'lr': 0.00043047433506731783, 'samples': 19458560, 'steps': 38004, 'loss/train': 1.8692299127578735} +03/05/2022 10:08:21 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 10:08:25 - INFO - codeparrot_training - Step 38005: {'lr': 0.00043047066275868795, 'samples': 19459072, 'steps': 38005, 'loss/train': 1.516891360282898} +03/05/2022 10:08:28 - INFO - codeparrot_training - Step 38006: {'lr': 0.0004304669903687408, 'samples': 19459584, 'steps': 38006, 'loss/train': 1.7022420167922974} +03/05/2022 10:08:29 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/05/2022 10:08:33 - INFO - codeparrot_training - Step 38007: {'lr': 0.000430463317897478, 'samples': 19460096, 'steps': 38007, 'loss/train': 0.8942084312438965} +03/05/2022 10:08:36 - INFO - codeparrot_training - Step 38008: {'lr': 0.0004304596453449012, 'samples': 19460608, 'steps': 38008, 'loss/train': 1.6661148071289062} +03/05/2022 10:08:38 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/05/2022 10:08:42 - INFO - codeparrot_training - Step 38009: {'lr': 0.0004304559727110121, 'samples': 19461120, 'steps': 38009, 'loss/train': 0.599915087223053} +03/05/2022 10:08:45 - INFO - codeparrot_training - Step 38010: {'lr': 0.0004304522999958124, 'samples': 19461632, 'steps': 38010, 'loss/train': 1.0640931129455566} +03/05/2022 10:08:46 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 10:08:50 - INFO - codeparrot_training - Step 38011: {'lr': 0.00043044862719930356, 'samples': 19462144, 'steps': 38011, 'loss/train': 2.4148948192596436} +03/05/2022 10:08:53 - INFO - codeparrot_training - Step 38012: {'lr': 0.0004304449543214874, 'samples': 19462656, 'steps': 38012, 'loss/train': 1.7904378175735474} +03/05/2022 10:08:55 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 10:08:58 - INFO - codeparrot_training - Step 38013: {'lr': 0.0004304412813623655, 'samples': 19463168, 'steps': 38013, 'loss/train': 1.827587366104126} +03/05/2022 10:09:02 - INFO - codeparrot_training - Step 38014: {'lr': 0.0004304376083219396, 'samples': 19463680, 'steps': 38014, 'loss/train': 2.2927942276000977} +03/05/2022 10:09:03 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/05/2022 10:09:07 - INFO - codeparrot_training - Step 38015: {'lr': 0.00043043393520021125, 'samples': 19464192, 'steps': 38015, 'loss/train': 2.10367488861084} +03/05/2022 10:09:10 - INFO - codeparrot_training - Step 38016: {'lr': 0.0004304302619971822, 'samples': 19464704, 'steps': 38016, 'loss/train': 2.2790417671203613} +03/05/2022 10:09:12 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/05/2022 10:09:15 - INFO - codeparrot_training - Step 38017: {'lr': 0.000430426588712854, 'samples': 19465216, 'steps': 38017, 'loss/train': 1.7684578895568848} +03/05/2022 10:09:18 - INFO - codeparrot_training - Step 38018: {'lr': 0.0004304229153472283, 'samples': 19465728, 'steps': 38018, 'loss/train': 1.3615593910217285} +03/05/2022 10:09:20 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 10:09:24 - INFO - codeparrot_training - Step 38019: {'lr': 0.0004304192419003069, 'samples': 19466240, 'steps': 38019, 'loss/train': 1.5945992469787598} +03/05/2022 10:09:27 - INFO - codeparrot_training - Step 38020: {'lr': 0.0004304155683720914, 'samples': 19466752, 'steps': 38020, 'loss/train': 1.4171319007873535} +03/05/2022 10:09:29 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/05/2022 10:09:32 - INFO - codeparrot_training - Step 38021: {'lr': 0.0004304118947625835, 'samples': 19467264, 'steps': 38021, 'loss/train': 1.90484619140625} +03/05/2022 10:09:36 - INFO - codeparrot_training - Step 38022: {'lr': 0.00043040822107178465, 'samples': 19467776, 'steps': 38022, 'loss/train': 1.8489912748336792} +03/05/2022 10:09:37 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 10:09:41 - INFO - codeparrot_training - Step 38023: {'lr': 0.0004304045472996966, 'samples': 19468288, 'steps': 38023, 'loss/train': 1.2445861101150513} +03/05/2022 10:09:44 - INFO - codeparrot_training - Step 38024: {'lr': 0.0004304008734463212, 'samples': 19468800, 'steps': 38024, 'loss/train': 1.8997186422348022} +03/05/2022 10:09:46 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 10:09:49 - INFO - codeparrot_training - Step 38025: {'lr': 0.00043039719951165986, 'samples': 19469312, 'steps': 38025, 'loss/train': 1.5180245637893677} +03/05/2022 10:09:52 - INFO - codeparrot_training - Step 38026: {'lr': 0.0004303935254957143, 'samples': 19469824, 'steps': 38026, 'loss/train': 1.5014299154281616} +03/05/2022 10:09:54 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 10:09:58 - INFO - codeparrot_training - Step 38027: {'lr': 0.0004303898513984863, 'samples': 19470336, 'steps': 38027, 'loss/train': 2.1215882301330566} +03/05/2022 10:10:01 - INFO - codeparrot_training - Step 38028: {'lr': 0.0004303861772199773, 'samples': 19470848, 'steps': 38028, 'loss/train': 1.5143530368804932} +03/05/2022 10:10:04 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 10:10:07 - INFO - codeparrot_training - Step 38029: {'lr': 0.00043038250296018916, 'samples': 19471360, 'steps': 38029, 'loss/train': 1.985036015510559} +03/05/2022 10:10:10 - INFO - codeparrot_training - Step 38030: {'lr': 0.00043037882861912344, 'samples': 19471872, 'steps': 38030, 'loss/train': 1.5242153406143188} +03/05/2022 10:10:13 - INFO - codeparrot_training - Step 38031: {'lr': 0.00043037515419678174, 'samples': 19472384, 'steps': 38031, 'loss/train': 1.4357268810272217} +03/05/2022 10:10:13 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/05/2022 10:10:18 - INFO - codeparrot_training - Step 38032: {'lr': 0.0004303714796931658, 'samples': 19472896, 'steps': 38032, 'loss/train': 0.2413451075553894} +03/05/2022 10:10:21 - INFO - codeparrot_training - Step 38033: {'lr': 0.0004303678051082773, 'samples': 19473408, 'steps': 38033, 'loss/train': 2.067565679550171} +03/05/2022 10:10:21 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/05/2022 10:10:27 - INFO - codeparrot_training - Step 38034: {'lr': 0.00043036413044211786, 'samples': 19473920, 'steps': 38034, 'loss/train': 1.5207383632659912} +03/05/2022 10:10:30 - INFO - codeparrot_training - Step 38035: {'lr': 0.0004303604556946891, 'samples': 19474432, 'steps': 38035, 'loss/train': 1.1377378702163696} +03/05/2022 10:10:30 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 10:10:35 - INFO - codeparrot_training - Step 38036: {'lr': 0.00043035678086599265, 'samples': 19474944, 'steps': 38036, 'loss/train': 1.7207690477371216} +03/05/2022 10:10:38 - INFO - codeparrot_training - Step 38037: {'lr': 0.00043035310595603026, 'samples': 19475456, 'steps': 38037, 'loss/train': 1.878233790397644} +03/05/2022 10:10:39 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/05/2022 10:10:44 - INFO - codeparrot_training - Step 38038: {'lr': 0.00043034943096480357, 'samples': 19475968, 'steps': 38038, 'loss/train': 1.380743145942688} +03/05/2022 10:10:47 - INFO - codeparrot_training - Step 38039: {'lr': 0.0004303457558923142, 'samples': 19476480, 'steps': 38039, 'loss/train': 1.5217046737670898} +03/05/2022 10:10:48 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/05/2022 10:10:52 - INFO - codeparrot_training - Step 38040: {'lr': 0.00043034208073856374, 'samples': 19476992, 'steps': 38040, 'loss/train': 2.0249781608581543} +03/05/2022 10:10:55 - INFO - codeparrot_training - Step 38041: {'lr': 0.000430338405503554, 'samples': 19477504, 'steps': 38041, 'loss/train': 2.0000715255737305} +03/05/2022 10:10:56 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 10:11:01 - INFO - codeparrot_training - Step 38042: {'lr': 0.00043033473018728655, 'samples': 19478016, 'steps': 38042, 'loss/train': 1.8912651538848877} +03/05/2022 10:11:04 - INFO - codeparrot_training - Step 38043: {'lr': 0.00043033105478976306, 'samples': 19478528, 'steps': 38043, 'loss/train': 1.4762508869171143} +03/05/2022 10:11:04 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 10:11:09 - INFO - codeparrot_training - Step 38044: {'lr': 0.00043032737931098517, 'samples': 19479040, 'steps': 38044, 'loss/train': 2.386420965194702} +03/05/2022 10:11:12 - INFO - codeparrot_training - Step 38045: {'lr': 0.0004303237037509545, 'samples': 19479552, 'steps': 38045, 'loss/train': 0.9421160817146301} +03/05/2022 10:11:13 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 10:11:17 - INFO - codeparrot_training - Step 38046: {'lr': 0.0004303200281096727, 'samples': 19480064, 'steps': 38046, 'loss/train': 2.018425464630127} +03/05/2022 10:11:21 - INFO - codeparrot_training - Step 38047: {'lr': 0.00043031635238714163, 'samples': 19480576, 'steps': 38047, 'loss/train': 1.9391707181930542} +03/05/2022 10:11:21 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/05/2022 10:11:26 - INFO - codeparrot_training - Step 38048: {'lr': 0.00043031267658336276, 'samples': 19481088, 'steps': 38048, 'loss/train': 1.9107561111450195} +03/05/2022 10:11:29 - INFO - codeparrot_training - Step 38049: {'lr': 0.00043030900069833774, 'samples': 19481600, 'steps': 38049, 'loss/train': 1.5236433744430542} +03/05/2022 10:11:34 - INFO - codeparrot_training - Step 38050: {'lr': 0.0004303053247320683, 'samples': 19482112, 'steps': 38050, 'loss/train': 1.9727452993392944} +03/05/2022 10:11:37 - INFO - codeparrot_training - Step 38051: {'lr': 0.000430301648684556, 'samples': 19482624, 'steps': 38051, 'loss/train': 1.4848424196243286} +03/05/2022 10:11:38 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 10:11:43 - INFO - codeparrot_training - Step 38052: {'lr': 0.0004302979725558026, 'samples': 19483136, 'steps': 38052, 'loss/train': 1.338963270187378} +03/05/2022 10:11:46 - INFO - codeparrot_training - Step 38053: {'lr': 0.0004302942963458097, 'samples': 19483648, 'steps': 38053, 'loss/train': 1.6960563659667969} +03/05/2022 10:11:46 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 10:11:51 - INFO - codeparrot_training - Step 38054: {'lr': 0.00043029062005457897, 'samples': 19484160, 'steps': 38054, 'loss/train': 1.7523781061172485} +03/05/2022 10:11:54 - INFO - codeparrot_training - Step 38055: {'lr': 0.00043028694368211216, 'samples': 19484672, 'steps': 38055, 'loss/train': 1.1035007238388062} +03/05/2022 10:11:55 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 10:12:00 - INFO - codeparrot_training - Step 38056: {'lr': 0.00043028326722841073, 'samples': 19485184, 'steps': 38056, 'loss/train': 1.5697715282440186} +03/05/2022 10:12:03 - INFO - codeparrot_training - Step 38057: {'lr': 0.00043027959069347644, 'samples': 19485696, 'steps': 38057, 'loss/train': 1.475107192993164} +03/05/2022 10:12:03 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/05/2022 10:12:08 - INFO - codeparrot_training - Step 38058: {'lr': 0.00043027591407731106, 'samples': 19486208, 'steps': 38058, 'loss/train': 2.2208268642425537} +03/05/2022 10:12:11 - INFO - codeparrot_training - Step 38059: {'lr': 0.000430272237379916, 'samples': 19486720, 'steps': 38059, 'loss/train': 2.2744526863098145} +03/05/2022 10:12:11 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 10:12:16 - INFO - codeparrot_training - Step 38060: {'lr': 0.00043026856060129307, 'samples': 19487232, 'steps': 38060, 'loss/train': 2.045475959777832} +03/05/2022 10:12:20 - INFO - codeparrot_training - Step 38061: {'lr': 0.00043026488374144404, 'samples': 19487744, 'steps': 38061, 'loss/train': 1.6731675863265991} +03/05/2022 10:12:20 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/05/2022 10:12:25 - INFO - codeparrot_training - Step 38062: {'lr': 0.00043026120680037026, 'samples': 19488256, 'steps': 38062, 'loss/train': 2.1057446002960205} +03/05/2022 10:12:28 - INFO - codeparrot_training - Step 38063: {'lr': 0.00043025752977807365, 'samples': 19488768, 'steps': 38063, 'loss/train': 1.4114596843719482} +03/05/2022 10:12:28 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 10:12:33 - INFO - codeparrot_training - Step 38064: {'lr': 0.00043025385267455576, 'samples': 19489280, 'steps': 38064, 'loss/train': 1.6377941370010376} +03/05/2022 10:12:37 - INFO - codeparrot_training - Step 38065: {'lr': 0.0004302501754898183, 'samples': 19489792, 'steps': 38065, 'loss/train': 1.6610007286071777} +03/05/2022 10:12:37 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/05/2022 10:12:42 - INFO - codeparrot_training - Step 38066: {'lr': 0.00043024649822386284, 'samples': 19490304, 'steps': 38066, 'loss/train': 0.8098395466804504} +03/05/2022 10:12:45 - INFO - codeparrot_training - Step 38067: {'lr': 0.00043024282087669106, 'samples': 19490816, 'steps': 38067, 'loss/train': 0.6418740749359131} +03/05/2022 10:12:45 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 10:12:50 - INFO - codeparrot_training - Step 38068: {'lr': 0.0004302391434483048, 'samples': 19491328, 'steps': 38068, 'loss/train': 1.94647216796875} +03/05/2022 10:12:53 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 10:12:56 - INFO - codeparrot_training - Step 38069: {'lr': 0.00043023546593870543, 'samples': 19491840, 'steps': 38069, 'loss/train': 1.2817102670669556} +03/05/2022 10:12:59 - INFO - codeparrot_training - Step 38070: {'lr': 0.00043023178834789477, 'samples': 19492352, 'steps': 38070, 'loss/train': 1.5265451669692993} +03/05/2022 10:13:02 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/05/2022 10:13:04 - INFO - codeparrot_training - Step 38071: {'lr': 0.0004302281106758745, 'samples': 19492864, 'steps': 38071, 'loss/train': 2.166799783706665} +03/05/2022 10:13:07 - INFO - codeparrot_training - Step 38072: {'lr': 0.00043022443292264613, 'samples': 19493376, 'steps': 38072, 'loss/train': 2.004671573638916} +03/05/2022 10:13:10 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 10:13:12 - INFO - codeparrot_training - Step 38073: {'lr': 0.00043022075508821145, 'samples': 19493888, 'steps': 38073, 'loss/train': 1.847042441368103} +03/05/2022 10:13:16 - INFO - codeparrot_training - Step 38074: {'lr': 0.0004302170771725721, 'samples': 19494400, 'steps': 38074, 'loss/train': 1.8174740076065063} +03/05/2022 10:13:18 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/05/2022 10:13:21 - INFO - codeparrot_training - Step 38075: {'lr': 0.0004302133991757297, 'samples': 19494912, 'steps': 38075, 'loss/train': 1.4159934520721436} +03/05/2022 10:13:24 - INFO - codeparrot_training - Step 38076: {'lr': 0.000430209721097686, 'samples': 19495424, 'steps': 38076, 'loss/train': 2.125657558441162} +03/05/2022 10:13:27 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 10:13:29 - INFO - codeparrot_training - Step 38077: {'lr': 0.00043020604293844244, 'samples': 19495936, 'steps': 38077, 'loss/train': 1.245300054550171} +03/05/2022 10:13:32 - INFO - codeparrot_training - Step 38078: {'lr': 0.0004302023646980009, 'samples': 19496448, 'steps': 38078, 'loss/train': 1.9822871685028076} +03/05/2022 10:13:35 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/05/2022 10:13:38 - INFO - codeparrot_training - Step 38079: {'lr': 0.00043019868637636294, 'samples': 19496960, 'steps': 38079, 'loss/train': 1.8194363117218018} +03/05/2022 10:13:41 - INFO - codeparrot_training - Step 38080: {'lr': 0.0004301950079735302, 'samples': 19497472, 'steps': 38080, 'loss/train': 2.4744622707366943} +03/05/2022 10:13:43 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 10:13:46 - INFO - codeparrot_training - Step 38081: {'lr': 0.00043019132948950443, 'samples': 19497984, 'steps': 38081, 'loss/train': 1.3772854804992676} +03/05/2022 10:13:49 - INFO - codeparrot_training - Step 38082: {'lr': 0.0004301876509242872, 'samples': 19498496, 'steps': 38082, 'loss/train': 1.797467827796936} +03/05/2022 10:13:51 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 10:13:55 - INFO - codeparrot_training - Step 38083: {'lr': 0.0004301839722778802, 'samples': 19499008, 'steps': 38083, 'loss/train': 1.9844164848327637} +03/05/2022 10:13:58 - INFO - codeparrot_training - Step 38084: {'lr': 0.0004301802935502851, 'samples': 19499520, 'steps': 38084, 'loss/train': 1.7103025913238525} +03/05/2022 10:14:00 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 10:14:03 - INFO - codeparrot_training - Step 38085: {'lr': 0.00043017661474150347, 'samples': 19500032, 'steps': 38085, 'loss/train': 2.346693515777588} +03/05/2022 10:14:06 - INFO - codeparrot_training - Step 38086: {'lr': 0.0004301729358515371, 'samples': 19500544, 'steps': 38086, 'loss/train': 1.3317945003509521} +03/05/2022 10:14:08 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/05/2022 10:14:12 - INFO - codeparrot_training - Step 38087: {'lr': 0.00043016925688038756, 'samples': 19501056, 'steps': 38087, 'loss/train': 1.9408619403839111} +03/05/2022 10:14:15 - INFO - codeparrot_training - Step 38088: {'lr': 0.00043016557782805655, 'samples': 19501568, 'steps': 38088, 'loss/train': 1.227591872215271} +03/05/2022 10:14:17 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 10:14:20 - INFO - codeparrot_training - Step 38089: {'lr': 0.0004301618986945457, 'samples': 19502080, 'steps': 38089, 'loss/train': 1.296087384223938} +03/05/2022 10:14:23 - INFO - codeparrot_training - Step 38090: {'lr': 0.0004301582194798567, 'samples': 19502592, 'steps': 38090, 'loss/train': 1.4857016801834106} +03/05/2022 10:14:25 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/05/2022 10:14:28 - INFO - codeparrot_training - Step 38091: {'lr': 0.00043015454018399115, 'samples': 19503104, 'steps': 38091, 'loss/train': 1.1398205757141113} +03/05/2022 10:14:32 - INFO - codeparrot_training - Step 38092: {'lr': 0.00043015086080695075, 'samples': 19503616, 'steps': 38092, 'loss/train': 1.7836833000183105} +03/05/2022 10:14:33 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 10:14:37 - INFO - codeparrot_training - Step 38093: {'lr': 0.0004301471813487372, 'samples': 19504128, 'steps': 38093, 'loss/train': 2.334439277648926} +03/05/2022 10:14:40 - INFO - codeparrot_training - Step 38094: {'lr': 0.00043014350180935207, 'samples': 19504640, 'steps': 38094, 'loss/train': 1.0010154247283936} +03/05/2022 10:14:42 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 10:14:45 - INFO - codeparrot_training - Step 38095: {'lr': 0.0004301398221887971, 'samples': 19505152, 'steps': 38095, 'loss/train': 1.8115538358688354} +03/05/2022 10:14:49 - INFO - codeparrot_training - Step 38096: {'lr': 0.0004301361424870739, 'samples': 19505664, 'steps': 38096, 'loss/train': 2.4744937419891357} +03/05/2022 10:14:50 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 10:14:54 - INFO - codeparrot_training - Step 38097: {'lr': 0.00043013246270418406, 'samples': 19506176, 'steps': 38097, 'loss/train': 1.8701446056365967} +03/05/2022 10:14:57 - INFO - codeparrot_training - Step 38098: {'lr': 0.00043012878284012936, 'samples': 19506688, 'steps': 38098, 'loss/train': 0.5405595302581787} +03/05/2022 10:14:59 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 10:15:02 - INFO - codeparrot_training - Step 38099: {'lr': 0.0004301251028949114, 'samples': 19507200, 'steps': 38099, 'loss/train': 1.9475120306015015} +03/05/2022 10:15:06 - INFO - codeparrot_training - Step 38100: {'lr': 0.00043012142286853185, 'samples': 19507712, 'steps': 38100, 'loss/train': 2.3827965259552} +03/05/2022 10:15:07 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/05/2022 10:15:11 - INFO - codeparrot_training - Step 38101: {'lr': 0.00043011774276099235, 'samples': 19508224, 'steps': 38101, 'loss/train': 0.4602147042751312} +03/05/2022 10:15:14 - INFO - codeparrot_training - Step 38102: {'lr': 0.0004301140625722946, 'samples': 19508736, 'steps': 38102, 'loss/train': 1.29271399974823} +03/05/2022 10:15:16 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 10:15:19 - INFO - codeparrot_training - Step 38103: {'lr': 0.0004301103823024403, 'samples': 19509248, 'steps': 38103, 'loss/train': 0.7816430330276489} +03/05/2022 10:15:23 - INFO - codeparrot_training - Step 38104: {'lr': 0.0004301067019514309, 'samples': 19509760, 'steps': 38104, 'loss/train': 1.677298903465271} +03/05/2022 10:15:24 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 10:15:28 - INFO - codeparrot_training - Step 38105: {'lr': 0.0004301030215192683, 'samples': 19510272, 'steps': 38105, 'loss/train': 1.1386585235595703} +03/05/2022 10:15:31 - INFO - codeparrot_training - Step 38106: {'lr': 0.00043009934100595403, 'samples': 19510784, 'steps': 38106, 'loss/train': 2.16165828704834} +03/05/2022 10:15:33 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 10:15:36 - INFO - codeparrot_training - Step 38107: {'lr': 0.00043009566041148973, 'samples': 19511296, 'steps': 38107, 'loss/train': 2.050820827484131} +03/05/2022 10:15:40 - INFO - codeparrot_training - Step 38108: {'lr': 0.0004300919797358772, 'samples': 19511808, 'steps': 38108, 'loss/train': 2.3823142051696777} +03/05/2022 10:15:41 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/05/2022 10:15:45 - INFO - codeparrot_training - Step 38109: {'lr': 0.00043008829897911796, 'samples': 19512320, 'steps': 38109, 'loss/train': 1.2240188121795654} +03/05/2022 10:15:48 - INFO - codeparrot_training - Step 38110: {'lr': 0.0004300846181412137, 'samples': 19512832, 'steps': 38110, 'loss/train': 1.2134724855422974} +03/05/2022 10:15:50 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 10:15:53 - INFO - codeparrot_training - Step 38111: {'lr': 0.00043008093722216603, 'samples': 19513344, 'steps': 38111, 'loss/train': 1.4284002780914307} +03/05/2022 10:15:56 - INFO - codeparrot_training - Step 38112: {'lr': 0.00043007725622197675, 'samples': 19513856, 'steps': 38112, 'loss/train': 2.0763683319091797} +03/05/2022 10:15:58 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 10:16:02 - INFO - codeparrot_training - Step 38113: {'lr': 0.0004300735751406474, 'samples': 19514368, 'steps': 38113, 'loss/train': 1.9253026247024536} +03/05/2022 10:16:05 - INFO - codeparrot_training - Step 38114: {'lr': 0.00043006989397817967, 'samples': 19514880, 'steps': 38114, 'loss/train': 1.320953369140625} +03/05/2022 10:16:06 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/05/2022 10:16:10 - INFO - codeparrot_training - Step 38115: {'lr': 0.00043006621273457523, 'samples': 19515392, 'steps': 38115, 'loss/train': 1.3291610479354858} +03/05/2022 10:16:13 - INFO - codeparrot_training - Step 38116: {'lr': 0.0004300625314098358, 'samples': 19515904, 'steps': 38116, 'loss/train': 1.5463697910308838} +03/05/2022 10:16:15 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 10:16:19 - INFO - codeparrot_training - Step 38117: {'lr': 0.0004300588500039629, 'samples': 19516416, 'steps': 38117, 'loss/train': 1.4944273233413696} +03/05/2022 10:16:22 - INFO - codeparrot_training - Step 38118: {'lr': 0.0004300551685169583, 'samples': 19516928, 'steps': 38118, 'loss/train': 1.9958827495574951} +03/05/2022 10:16:23 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 10:16:27 - INFO - codeparrot_training - Step 38119: {'lr': 0.0004300514869488236, 'samples': 19517440, 'steps': 38119, 'loss/train': 1.5339068174362183} +03/05/2022 10:16:30 - INFO - codeparrot_training - Step 38120: {'lr': 0.00043004780529956046, 'samples': 19517952, 'steps': 38120, 'loss/train': 1.806308388710022} +03/05/2022 10:16:31 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 10:16:35 - INFO - codeparrot_training - Step 38121: {'lr': 0.00043004412356917055, 'samples': 19518464, 'steps': 38121, 'loss/train': 2.0159950256347656} +03/05/2022 10:16:38 - INFO - codeparrot_training - Step 38122: {'lr': 0.0004300404417576556, 'samples': 19518976, 'steps': 38122, 'loss/train': 1.9803131818771362} +03/05/2022 10:16:40 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/05/2022 10:16:44 - INFO - codeparrot_training - Step 38123: {'lr': 0.00043003675986501717, 'samples': 19519488, 'steps': 38123, 'loss/train': 1.88717782497406} +03/05/2022 10:16:47 - INFO - codeparrot_training - Step 38124: {'lr': 0.00043003307789125694, 'samples': 19520000, 'steps': 38124, 'loss/train': 2.2744219303131104} +03/05/2022 10:16:48 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 10:16:52 - INFO - codeparrot_training - Step 38125: {'lr': 0.0004300293958363766, 'samples': 19520512, 'steps': 38125, 'loss/train': 1.4435806274414062} +03/05/2022 10:16:55 - INFO - codeparrot_training - Step 38126: {'lr': 0.00043002571370037777, 'samples': 19521024, 'steps': 38126, 'loss/train': 1.696510910987854} +03/05/2022 10:16:57 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/05/2022 10:17:01 - INFO - codeparrot_training - Step 38127: {'lr': 0.00043002203148326213, 'samples': 19521536, 'steps': 38127, 'loss/train': 1.5090446472167969} +03/05/2022 10:17:04 - INFO - codeparrot_training - Step 38128: {'lr': 0.0004300183491850314, 'samples': 19522048, 'steps': 38128, 'loss/train': 2.0730791091918945} +03/05/2022 10:17:05 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/05/2022 10:17:10 - INFO - codeparrot_training - Step 38129: {'lr': 0.0004300146668056871, 'samples': 19522560, 'steps': 38129, 'loss/train': 1.605167031288147} +03/05/2022 10:17:13 - INFO - codeparrot_training - Step 38130: {'lr': 0.00043001098434523107, 'samples': 19523072, 'steps': 38130, 'loss/train': 2.2721474170684814} +03/05/2022 10:17:15 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 10:17:18 - INFO - codeparrot_training - Step 38131: {'lr': 0.0004300073018036648, 'samples': 19523584, 'steps': 38131, 'loss/train': 2.1520752906799316} +03/05/2022 10:17:21 - INFO - codeparrot_training - Step 38132: {'lr': 0.00043000361918099, 'samples': 19524096, 'steps': 38132, 'loss/train': 1.0600160360336304} +03/05/2022 10:17:23 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 10:17:26 - INFO - codeparrot_training - Step 38133: {'lr': 0.00042999993647720836, 'samples': 19524608, 'steps': 38133, 'loss/train': 1.5030423402786255} +03/05/2022 10:17:30 - INFO - codeparrot_training - Step 38134: {'lr': 0.0004299962536923215, 'samples': 19525120, 'steps': 38134, 'loss/train': 1.732623815536499} +03/05/2022 10:17:31 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/05/2022 10:17:35 - INFO - codeparrot_training - Step 38135: {'lr': 0.0004299925708263312, 'samples': 19525632, 'steps': 38135, 'loss/train': 1.343214988708496} +03/05/2022 10:17:38 - INFO - codeparrot_training - Step 38136: {'lr': 0.00042998888787923895, 'samples': 19526144, 'steps': 38136, 'loss/train': 1.5132626295089722} +03/05/2022 10:17:40 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/05/2022 10:17:43 - INFO - codeparrot_training - Step 38137: {'lr': 0.0004299852048510465, 'samples': 19526656, 'steps': 38137, 'loss/train': 1.7135881185531616} +03/05/2022 10:17:46 - INFO - codeparrot_training - Step 38138: {'lr': 0.00042998152174175555, 'samples': 19527168, 'steps': 38138, 'loss/train': 1.5525445938110352} +03/05/2022 10:17:48 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 10:17:52 - INFO - codeparrot_training - Step 38139: {'lr': 0.0004299778385513676, 'samples': 19527680, 'steps': 38139, 'loss/train': 2.096250534057617} +03/05/2022 10:17:55 - INFO - codeparrot_training - Step 38140: {'lr': 0.0004299741552798845, 'samples': 19528192, 'steps': 38140, 'loss/train': 0.9714369773864746} +03/05/2022 10:17:56 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 10:18:00 - INFO - codeparrot_training - Step 38141: {'lr': 0.0004299704719273078, 'samples': 19528704, 'steps': 38141, 'loss/train': 1.9438979625701904} +03/05/2022 10:18:03 - INFO - codeparrot_training - Step 38142: {'lr': 0.00042996678849363914, 'samples': 19529216, 'steps': 38142, 'loss/train': 1.6587756872177124} +03/05/2022 10:18:05 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 10:18:09 - INFO - codeparrot_training - Step 38143: {'lr': 0.00042996310497888025, 'samples': 19529728, 'steps': 38143, 'loss/train': 0.6900030374526978} +03/05/2022 10:18:12 - INFO - codeparrot_training - Step 38144: {'lr': 0.00042995942138303274, 'samples': 19530240, 'steps': 38144, 'loss/train': 1.8848631381988525} +03/05/2022 10:18:13 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 10:18:17 - INFO - codeparrot_training - Step 38145: {'lr': 0.0004299557377060983, 'samples': 19530752, 'steps': 38145, 'loss/train': 1.169919490814209} +03/05/2022 10:18:21 - INFO - codeparrot_training - Step 38146: {'lr': 0.00042995205394807864, 'samples': 19531264, 'steps': 38146, 'loss/train': 0.8233879804611206} +03/05/2022 10:18:23 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/05/2022 10:18:26 - INFO - codeparrot_training - Step 38147: {'lr': 0.00042994837010897524, 'samples': 19531776, 'steps': 38147, 'loss/train': 2.076493501663208} +03/05/2022 10:18:29 - INFO - codeparrot_training - Step 38148: {'lr': 0.00042994468618879, 'samples': 19532288, 'steps': 38148, 'loss/train': 2.4107954502105713} +03/05/2022 10:18:32 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 10:18:35 - INFO - codeparrot_training - Step 38149: {'lr': 0.0004299410021875244, 'samples': 19532800, 'steps': 38149, 'loss/train': 1.5858122110366821} +03/05/2022 10:18:38 - INFO - codeparrot_training - Step 38150: {'lr': 0.00042993731810518025, 'samples': 19533312, 'steps': 38150, 'loss/train': 1.7968353033065796} +03/05/2022 10:18:40 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/05/2022 10:18:43 - INFO - codeparrot_training - Step 38151: {'lr': 0.00042993363394175897, 'samples': 19533824, 'steps': 38151, 'loss/train': 1.3285233974456787} +03/05/2022 10:18:46 - INFO - codeparrot_training - Step 38152: {'lr': 0.0004299299496972625, 'samples': 19534336, 'steps': 38152, 'loss/train': 2.783716917037964} +03/05/2022 10:18:49 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 10:18:51 - INFO - codeparrot_training - Step 38153: {'lr': 0.0004299262653716923, 'samples': 19534848, 'steps': 38153, 'loss/train': 1.533878207206726} +03/05/2022 10:18:55 - INFO - codeparrot_training - Step 38154: {'lr': 0.0004299225809650501, 'samples': 19535360, 'steps': 38154, 'loss/train': 1.8411775827407837} +03/05/2022 10:18:57 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/05/2022 10:19:00 - INFO - codeparrot_training - Step 38155: {'lr': 0.0004299188964773376, 'samples': 19535872, 'steps': 38155, 'loss/train': 1.5124545097351074} +03/05/2022 10:19:03 - INFO - codeparrot_training - Step 38156: {'lr': 0.0004299152119085564, 'samples': 19536384, 'steps': 38156, 'loss/train': 1.3971705436706543} +03/05/2022 10:19:06 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/05/2022 10:19:08 - INFO - codeparrot_training - Step 38157: {'lr': 0.0004299115272587082, 'samples': 19536896, 'steps': 38157, 'loss/train': 2.054316520690918} +03/05/2022 10:19:11 - INFO - codeparrot_training - Step 38158: {'lr': 0.0004299078425277947, 'samples': 19537408, 'steps': 38158, 'loss/train': 1.35747230052948} +03/05/2022 10:19:14 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/05/2022 10:19:17 - INFO - codeparrot_training - Step 38159: {'lr': 0.00042990415771581734, 'samples': 19537920, 'steps': 38159, 'loss/train': 1.604402780532837} +03/05/2022 10:19:20 - INFO - codeparrot_training - Step 38160: {'lr': 0.0004299004728227781, 'samples': 19538432, 'steps': 38160, 'loss/train': 1.2720932960510254} +03/05/2022 10:19:23 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 10:19:25 - INFO - codeparrot_training - Step 38161: {'lr': 0.0004298967878486784, 'samples': 19538944, 'steps': 38161, 'loss/train': 1.8828601837158203} +03/05/2022 10:19:28 - INFO - codeparrot_training - Step 38162: {'lr': 0.00042989310279352, 'samples': 19539456, 'steps': 38162, 'loss/train': 1.9576854705810547} +03/05/2022 10:19:31 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 10:19:34 - INFO - codeparrot_training - Step 38163: {'lr': 0.0004298894176573046, 'samples': 19539968, 'steps': 38163, 'loss/train': 2.1857919692993164} +03/05/2022 10:19:37 - INFO - codeparrot_training - Step 38164: {'lr': 0.0004298857324400337, 'samples': 19540480, 'steps': 38164, 'loss/train': 1.766544222831726} +03/05/2022 10:19:39 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 10:19:42 - INFO - codeparrot_training - Step 38165: {'lr': 0.0004298820471417091, 'samples': 19540992, 'steps': 38165, 'loss/train': 2.026407480239868} +03/05/2022 10:19:45 - INFO - codeparrot_training - Step 38166: {'lr': 0.00042987836176233246, 'samples': 19541504, 'steps': 38166, 'loss/train': 1.0265092849731445} +03/05/2022 10:19:48 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 10:19:51 - INFO - codeparrot_training - Step 38167: {'lr': 0.0004298746763019054, 'samples': 19542016, 'steps': 38167, 'loss/train': 1.5174959897994995} +03/05/2022 10:19:54 - INFO - codeparrot_training - Step 38168: {'lr': 0.0004298709907604296, 'samples': 19542528, 'steps': 38168, 'loss/train': 0.5158788561820984} +03/05/2022 10:19:56 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/05/2022 10:19:59 - INFO - codeparrot_training - Step 38169: {'lr': 0.0004298673051379066, 'samples': 19543040, 'steps': 38169, 'loss/train': 1.688212513923645} +03/05/2022 10:20:02 - INFO - codeparrot_training - Step 38170: {'lr': 0.0004298636194343383, 'samples': 19543552, 'steps': 38170, 'loss/train': 1.5990475416183472} +03/05/2022 10:20:05 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/05/2022 10:20:08 - INFO - codeparrot_training - Step 38171: {'lr': 0.0004298599336497262, 'samples': 19544064, 'steps': 38171, 'loss/train': 2.0844388008117676} +03/05/2022 10:20:11 - INFO - codeparrot_training - Step 38172: {'lr': 0.00042985624778407196, 'samples': 19544576, 'steps': 38172, 'loss/train': 1.901390552520752} +03/05/2022 10:20:13 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 10:20:16 - INFO - codeparrot_training - Step 38173: {'lr': 0.00042985256183737723, 'samples': 19545088, 'steps': 38173, 'loss/train': 1.5261445045471191} +03/05/2022 10:20:19 - INFO - codeparrot_training - Step 38174: {'lr': 0.00042984887580964376, 'samples': 19545600, 'steps': 38174, 'loss/train': 2.038256883621216} +03/05/2022 10:20:22 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 10:20:25 - INFO - codeparrot_training - Step 38175: {'lr': 0.00042984518970087316, 'samples': 19546112, 'steps': 38175, 'loss/train': 1.355778455734253} +03/05/2022 10:20:28 - INFO - codeparrot_training - Step 38176: {'lr': 0.0004298415035110671, 'samples': 19546624, 'steps': 38176, 'loss/train': 2.02291202545166} +03/05/2022 10:20:31 - INFO - codeparrot_training - Step 38177: {'lr': 0.00042983781724022723, 'samples': 19547136, 'steps': 38177, 'loss/train': 0.4312288165092468} +03/05/2022 10:20:31 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/05/2022 10:20:36 - INFO - codeparrot_training - Step 38178: {'lr': 0.0004298341308883552, 'samples': 19547648, 'steps': 38178, 'loss/train': 1.4607990980148315} +03/05/2022 10:20:40 - INFO - codeparrot_training - Step 38179: {'lr': 0.0004298304444554527, 'samples': 19548160, 'steps': 38179, 'loss/train': 1.7812511920928955} +03/05/2022 10:20:40 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 10:20:45 - INFO - codeparrot_training - Step 38180: {'lr': 0.00042982675794152135, 'samples': 19548672, 'steps': 38180, 'loss/train': 1.5163365602493286} +03/05/2022 10:20:48 - INFO - codeparrot_training - Step 38181: {'lr': 0.0004298230713465629, 'samples': 19549184, 'steps': 38181, 'loss/train': 0.7158980965614319} +03/05/2022 10:20:48 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/05/2022 10:20:53 - INFO - codeparrot_training - Step 38182: {'lr': 0.00042981938467057893, 'samples': 19549696, 'steps': 38182, 'loss/train': 1.9785288572311401} +03/05/2022 10:20:56 - INFO - codeparrot_training - Step 38183: {'lr': 0.0004298156979135711, 'samples': 19550208, 'steps': 38183, 'loss/train': 1.6510062217712402} +03/05/2022 10:20:56 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/05/2022 10:21:01 - INFO - codeparrot_training - Step 38184: {'lr': 0.000429812011075541, 'samples': 19550720, 'steps': 38184, 'loss/train': 2.0987019538879395} +03/05/2022 10:21:05 - INFO - codeparrot_training - Step 38185: {'lr': 0.0004298083241564905, 'samples': 19551232, 'steps': 38185, 'loss/train': 0.8264381289482117} +03/05/2022 10:21:05 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 10:21:10 - INFO - codeparrot_training - Step 38186: {'lr': 0.00042980463715642115, 'samples': 19551744, 'steps': 38186, 'loss/train': 1.7903589010238647} +03/05/2022 10:21:14 - INFO - codeparrot_training - Step 38187: {'lr': 0.0004298009500753346, 'samples': 19552256, 'steps': 38187, 'loss/train': 3.2278077602386475} +03/05/2022 10:21:15 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/05/2022 10:21:19 - INFO - codeparrot_training - Step 38188: {'lr': 0.00042979726291323246, 'samples': 19552768, 'steps': 38188, 'loss/train': 1.6065139770507812} +03/05/2022 10:21:22 - INFO - codeparrot_training - Step 38189: {'lr': 0.00042979357567011643, 'samples': 19553280, 'steps': 38189, 'loss/train': 0.9445589184761047} +03/05/2022 10:21:23 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/05/2022 10:21:27 - INFO - codeparrot_training - Step 38190: {'lr': 0.0004297898883459883, 'samples': 19553792, 'steps': 38190, 'loss/train': 2.360586643218994} +03/05/2022 10:21:31 - INFO - codeparrot_training - Step 38191: {'lr': 0.00042978620094084955, 'samples': 19554304, 'steps': 38191, 'loss/train': 1.8124327659606934} +03/05/2022 10:21:32 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 10:21:36 - INFO - codeparrot_training - Step 38192: {'lr': 0.00042978251345470185, 'samples': 19554816, 'steps': 38192, 'loss/train': 1.7657251358032227} +03/05/2022 10:21:39 - INFO - codeparrot_training - Step 38193: {'lr': 0.000429778825887547, 'samples': 19555328, 'steps': 38193, 'loss/train': 2.0249710083007812} +03/05/2022 10:21:40 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/05/2022 10:21:44 - INFO - codeparrot_training - Step 38194: {'lr': 0.00042977513823938665, 'samples': 19555840, 'steps': 38194, 'loss/train': 1.8225529193878174} +03/05/2022 10:21:48 - INFO - codeparrot_training - Step 38195: {'lr': 0.00042977145051022224, 'samples': 19556352, 'steps': 38195, 'loss/train': 1.400360345840454} +03/05/2022 10:21:49 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/05/2022 10:21:53 - INFO - codeparrot_training - Step 38196: {'lr': 0.0004297677627000557, 'samples': 19556864, 'steps': 38196, 'loss/train': 2.1804728507995605} +03/05/2022 10:21:56 - INFO - codeparrot_training - Step 38197: {'lr': 0.0004297640748088886, 'samples': 19557376, 'steps': 38197, 'loss/train': 1.9173269271850586} +03/05/2022 10:21:57 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/05/2022 10:22:01 - INFO - codeparrot_training - Step 38198: {'lr': 0.0004297603868367225, 'samples': 19557888, 'steps': 38198, 'loss/train': 0.14692805707454681} +03/05/2022 10:22:04 - INFO - codeparrot_training - Step 38199: {'lr': 0.00042975669878355917, 'samples': 19558400, 'steps': 38199, 'loss/train': 1.710673213005066} +03/05/2022 10:22:06 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 10:22:10 - INFO - codeparrot_training - Step 38200: {'lr': 0.00042975301064940026, 'samples': 19558912, 'steps': 38200, 'loss/train': 2.148225784301758} +03/05/2022 10:22:13 - INFO - codeparrot_training - Step 38201: {'lr': 0.00042974932243424743, 'samples': 19559424, 'steps': 38201, 'loss/train': 1.3002058267593384} +03/05/2022 10:22:14 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/05/2022 10:22:18 - INFO - codeparrot_training - Step 38202: {'lr': 0.0004297456341381023, 'samples': 19559936, 'steps': 38202, 'loss/train': 1.3662004470825195} +03/05/2022 10:22:22 - INFO - codeparrot_training - Step 38203: {'lr': 0.0004297419457609666, 'samples': 19560448, 'steps': 38203, 'loss/train': 1.8153409957885742} +03/05/2022 10:22:22 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 10:22:27 - INFO - codeparrot_training - Step 38204: {'lr': 0.0004297382573028419, 'samples': 19560960, 'steps': 38204, 'loss/train': 2.665128707885742} +03/05/2022 10:22:30 - INFO - codeparrot_training - Step 38205: {'lr': 0.0004297345687637299, 'samples': 19561472, 'steps': 38205, 'loss/train': 2.403273344039917} +03/05/2022 10:22:31 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 10:22:35 - INFO - codeparrot_training - Step 38206: {'lr': 0.00042973088014363237, 'samples': 19561984, 'steps': 38206, 'loss/train': 2.2563891410827637} +03/05/2022 10:22:38 - INFO - codeparrot_training - Step 38207: {'lr': 0.0004297271914425508, 'samples': 19562496, 'steps': 38207, 'loss/train': 1.6115695238113403} +03/05/2022 10:22:39 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/05/2022 10:22:44 - INFO - codeparrot_training - Step 38208: {'lr': 0.00042972350266048693, 'samples': 19563008, 'steps': 38208, 'loss/train': 2.0249693393707275} +03/05/2022 10:22:47 - INFO - codeparrot_training - Step 38209: {'lr': 0.0004297198137974425, 'samples': 19563520, 'steps': 38209, 'loss/train': 1.094769835472107} +03/05/2022 10:22:48 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/05/2022 10:22:52 - INFO - codeparrot_training - Step 38210: {'lr': 0.00042971612485341896, 'samples': 19564032, 'steps': 38210, 'loss/train': 3.092541456222534} +03/05/2022 10:22:55 - INFO - codeparrot_training - Step 38211: {'lr': 0.00042971243582841823, 'samples': 19564544, 'steps': 38211, 'loss/train': 1.446276068687439} +03/05/2022 10:22:56 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/05/2022 10:23:01 - INFO - codeparrot_training - Step 38212: {'lr': 0.0004297087467224418, 'samples': 19565056, 'steps': 38212, 'loss/train': 1.038641333580017} +03/05/2022 10:23:04 - INFO - codeparrot_training - Step 38213: {'lr': 0.0004297050575354914, 'samples': 19565568, 'steps': 38213, 'loss/train': 1.6101264953613281} +03/05/2022 10:23:04 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 10:23:09 - INFO - codeparrot_training - Step 38214: {'lr': 0.0004297013682675687, 'samples': 19566080, 'steps': 38214, 'loss/train': 2.107543706893921} +03/05/2022 10:23:12 - INFO - codeparrot_training - Step 38215: {'lr': 0.0004296976789186753, 'samples': 19566592, 'steps': 38215, 'loss/train': 1.2800267934799194} +03/05/2022 10:23:13 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/05/2022 10:23:17 - INFO - codeparrot_training - Step 38216: {'lr': 0.00042969398948881286, 'samples': 19567104, 'steps': 38216, 'loss/train': 2.109590530395508} +03/05/2022 10:23:21 - INFO - codeparrot_training - Step 38217: {'lr': 0.00042969029997798314, 'samples': 19567616, 'steps': 38217, 'loss/train': 0.638293445110321} +03/05/2022 10:23:21 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 10:23:26 - INFO - codeparrot_training - Step 38218: {'lr': 0.00042968661038618775, 'samples': 19568128, 'steps': 38218, 'loss/train': 1.866135597229004} +03/05/2022 10:23:29 - INFO - codeparrot_training - Step 38219: {'lr': 0.0004296829207134283, 'samples': 19568640, 'steps': 38219, 'loss/train': 1.626603126525879} +03/05/2022 10:23:30 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 10:23:35 - INFO - codeparrot_training - Step 38220: {'lr': 0.0004296792309597065, 'samples': 19569152, 'steps': 38220, 'loss/train': 1.5221202373504639} +03/05/2022 10:23:38 - INFO - codeparrot_training - Step 38221: {'lr': 0.00042967554112502404, 'samples': 19569664, 'steps': 38221, 'loss/train': 1.7357590198516846} +03/05/2022 10:23:39 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 10:23:43 - INFO - codeparrot_training - Step 38222: {'lr': 0.00042967185120938256, 'samples': 19570176, 'steps': 38222, 'loss/train': 1.359134554862976} +03/05/2022 10:23:46 - INFO - codeparrot_training - Step 38223: {'lr': 0.00042966816121278365, 'samples': 19570688, 'steps': 38223, 'loss/train': 1.3038883209228516} +03/05/2022 10:23:47 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/05/2022 10:23:51 - INFO - codeparrot_training - Step 38224: {'lr': 0.0004296644711352291, 'samples': 19571200, 'steps': 38224, 'loss/train': 1.785362958908081} +03/05/2022 10:23:55 - INFO - codeparrot_training - Step 38225: {'lr': 0.0004296607809767205, 'samples': 19571712, 'steps': 38225, 'loss/train': 2.0929696559906006} +03/05/2022 10:23:56 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/05/2022 10:24:00 - INFO - codeparrot_training - Step 38226: {'lr': 0.00042965709073725957, 'samples': 19572224, 'steps': 38226, 'loss/train': 2.1142191886901855} +03/05/2022 10:24:03 - INFO - codeparrot_training - Step 38227: {'lr': 0.00042965340041684785, 'samples': 19572736, 'steps': 38227, 'loss/train': 2.672612190246582} +03/05/2022 10:24:04 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 10:24:08 - INFO - codeparrot_training - Step 38228: {'lr': 0.00042964971001548715, 'samples': 19573248, 'steps': 38228, 'loss/train': 2.0597496032714844} +03/05/2022 10:24:11 - INFO - codeparrot_training - Step 38229: {'lr': 0.00042964601953317895, 'samples': 19573760, 'steps': 38229, 'loss/train': 1.2374943494796753} +03/05/2022 10:24:12 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 10:24:17 - INFO - codeparrot_training - Step 38230: {'lr': 0.0004296423289699252, 'samples': 19574272, 'steps': 38230, 'loss/train': 1.5440378189086914} +03/05/2022 10:24:20 - INFO - codeparrot_training - Step 38231: {'lr': 0.00042963863832572727, 'samples': 19574784, 'steps': 38231, 'loss/train': 2.032487154006958} +03/05/2022 10:24:20 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 10:24:25 - INFO - codeparrot_training - Step 38232: {'lr': 0.0004296349476005869, 'samples': 19575296, 'steps': 38232, 'loss/train': 1.525044322013855} +03/05/2022 10:24:28 - INFO - codeparrot_training - Step 38233: {'lr': 0.0004296312567945059, 'samples': 19575808, 'steps': 38233, 'loss/train': 1.6452269554138184} +03/05/2022 10:24:30 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 10:24:34 - INFO - codeparrot_training - Step 38234: {'lr': 0.0004296275659074858, 'samples': 19576320, 'steps': 38234, 'loss/train': 1.5290149450302124} +03/05/2022 10:24:37 - INFO - codeparrot_training - Step 38235: {'lr': 0.00042962387493952823, 'samples': 19576832, 'steps': 38235, 'loss/train': 1.7374461889266968} +03/05/2022 10:24:40 - INFO - codeparrot_training - Step 38236: {'lr': 0.00042962018389063495, 'samples': 19577344, 'steps': 38236, 'loss/train': 1.8442904949188232} +03/05/2022 10:24:41 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/05/2022 10:24:46 - INFO - codeparrot_training - Step 38237: {'lr': 0.0004296164927608076, 'samples': 19577856, 'steps': 38237, 'loss/train': 2.214616537094116} +03/05/2022 10:24:49 - INFO - codeparrot_training - Step 38238: {'lr': 0.00042961280155004786, 'samples': 19578368, 'steps': 38238, 'loss/train': 2.2062056064605713} +03/05/2022 10:24:49 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 10:24:54 - INFO - codeparrot_training - Step 38239: {'lr': 0.0004296091102583573, 'samples': 19578880, 'steps': 38239, 'loss/train': 2.2183449268341064} +03/05/2022 10:24:57 - INFO - codeparrot_training - Step 38240: {'lr': 0.0004296054188857377, 'samples': 19579392, 'steps': 38240, 'loss/train': 2.102518081665039} +03/05/2022 10:24:58 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 10:25:03 - INFO - codeparrot_training - Step 38241: {'lr': 0.0004296017274321906, 'samples': 19579904, 'steps': 38241, 'loss/train': 0.6505129933357239} +03/05/2022 10:25:06 - INFO - codeparrot_training - Step 38242: {'lr': 0.0004295980358977178, 'samples': 19580416, 'steps': 38242, 'loss/train': 1.6552292108535767} +03/05/2022 10:25:06 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 10:25:11 - INFO - codeparrot_training - Step 38243: {'lr': 0.0004295943442823209, 'samples': 19580928, 'steps': 38243, 'loss/train': 2.0696794986724854} +03/05/2022 10:25:14 - INFO - codeparrot_training - Step 38244: {'lr': 0.0004295906525860015, 'samples': 19581440, 'steps': 38244, 'loss/train': 1.598634123802185} +03/05/2022 10:25:14 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/05/2022 10:25:20 - INFO - codeparrot_training - Step 38245: {'lr': 0.00042958696080876136, 'samples': 19581952, 'steps': 38245, 'loss/train': 2.4667675495147705} +03/05/2022 10:25:23 - INFO - codeparrot_training - Step 38246: {'lr': 0.00042958326895060206, 'samples': 19582464, 'steps': 38246, 'loss/train': 1.4917136430740356} +03/05/2022 10:25:23 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 10:25:28 - INFO - codeparrot_training - Step 38247: {'lr': 0.0004295795770115254, 'samples': 19582976, 'steps': 38247, 'loss/train': 2.1322760581970215} +03/05/2022 10:25:31 - INFO - codeparrot_training - Step 38248: {'lr': 0.0004295758849915329, 'samples': 19583488, 'steps': 38248, 'loss/train': 1.540297508239746} +03/05/2022 10:25:31 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/05/2022 10:25:37 - INFO - codeparrot_training - Step 38249: {'lr': 0.00042957219289062635, 'samples': 19584000, 'steps': 38249, 'loss/train': 2.0867114067077637} +03/05/2022 10:25:39 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/05/2022 10:25:42 - INFO - codeparrot_training - Step 38250: {'lr': 0.0004295685007088072, 'samples': 19584512, 'steps': 38250, 'loss/train': 1.8317899703979492} +03/05/2022 10:25:45 - INFO - codeparrot_training - Step 38251: {'lr': 0.00042956480844607734, 'samples': 19585024, 'steps': 38251, 'loss/train': 1.069628357887268} +03/05/2022 10:25:48 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/05/2022 10:25:50 - INFO - codeparrot_training - Step 38252: {'lr': 0.00042956111610243833, 'samples': 19585536, 'steps': 38252, 'loss/train': 1.6579670906066895} +03/05/2022 10:25:54 - INFO - codeparrot_training - Step 38253: {'lr': 0.0004295574236778919, 'samples': 19586048, 'steps': 38253, 'loss/train': 1.2710665464401245} +03/05/2022 10:25:56 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 10:25:59 - INFO - codeparrot_training - Step 38254: {'lr': 0.00042955373117243954, 'samples': 19586560, 'steps': 38254, 'loss/train': 1.9285047054290771} +03/05/2022 10:26:02 - INFO - codeparrot_training - Step 38255: {'lr': 0.0004295500385860832, 'samples': 19587072, 'steps': 38255, 'loss/train': 2.205305337905884} +03/05/2022 10:26:04 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/05/2022 10:26:07 - INFO - codeparrot_training - Step 38256: {'lr': 0.0004295463459188243, 'samples': 19587584, 'steps': 38256, 'loss/train': 1.8881876468658447} +03/05/2022 10:26:10 - INFO - codeparrot_training - Step 38257: {'lr': 0.00042954265317066457, 'samples': 19588096, 'steps': 38257, 'loss/train': 1.8493458032608032} +03/05/2022 10:26:13 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 10:26:16 - INFO - codeparrot_training - Step 38258: {'lr': 0.0004295389603416057, 'samples': 19588608, 'steps': 38258, 'loss/train': 1.1372871398925781} +03/05/2022 10:26:19 - INFO - codeparrot_training - Step 38259: {'lr': 0.0004295352674316494, 'samples': 19589120, 'steps': 38259, 'loss/train': 2.3027684688568115} +03/05/2022 10:26:21 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/05/2022 10:26:24 - INFO - codeparrot_training - Step 38260: {'lr': 0.0004295315744407972, 'samples': 19589632, 'steps': 38260, 'loss/train': 1.7547498941421509} +03/05/2022 10:26:27 - INFO - codeparrot_training - Step 38261: {'lr': 0.0004295278813690509, 'samples': 19590144, 'steps': 38261, 'loss/train': 0.9920597672462463} +03/05/2022 10:26:30 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/05/2022 10:26:33 - INFO - codeparrot_training - Step 38262: {'lr': 0.0004295241882164121, 'samples': 19590656, 'steps': 38262, 'loss/train': 2.0222606658935547} +03/05/2022 10:26:36 - INFO - codeparrot_training - Step 38263: {'lr': 0.0004295204949828825, 'samples': 19591168, 'steps': 38263, 'loss/train': 0.5052517056465149} +03/05/2022 10:26:38 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 10:26:41 - INFO - codeparrot_training - Step 38264: {'lr': 0.0004295168016684636, 'samples': 19591680, 'steps': 38264, 'loss/train': 2.0428943634033203} +03/05/2022 10:26:44 - INFO - codeparrot_training - Step 38265: {'lr': 0.0004295131082731574, 'samples': 19592192, 'steps': 38265, 'loss/train': 2.049250841140747} +03/05/2022 10:26:47 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 10:26:50 - INFO - codeparrot_training - Step 38266: {'lr': 0.0004295094147969652, 'samples': 19592704, 'steps': 38266, 'loss/train': 2.2261714935302734} +03/05/2022 10:26:53 - INFO - codeparrot_training - Step 38267: {'lr': 0.0004295057212398889, 'samples': 19593216, 'steps': 38267, 'loss/train': 2.0380611419677734} +03/05/2022 10:26:55 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/05/2022 10:26:58 - INFO - codeparrot_training - Step 38268: {'lr': 0.00042950202760193003, 'samples': 19593728, 'steps': 38268, 'loss/train': 1.4096884727478027} +03/05/2022 10:27:01 - INFO - codeparrot_training - Step 38269: {'lr': 0.0004294983338830904, 'samples': 19594240, 'steps': 38269, 'loss/train': 1.6578811407089233} +03/05/2022 10:27:04 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 10:27:06 - INFO - codeparrot_training - Step 38270: {'lr': 0.0004294946400833716, 'samples': 19594752, 'steps': 38270, 'loss/train': 1.9715553522109985} +03/05/2022 10:27:10 - INFO - codeparrot_training - Step 38271: {'lr': 0.0004294909462027752, 'samples': 19595264, 'steps': 38271, 'loss/train': 2.314692258834839} +03/05/2022 10:27:12 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/05/2022 10:27:15 - INFO - codeparrot_training - Step 38272: {'lr': 0.000429487252241303, 'samples': 19595776, 'steps': 38272, 'loss/train': 0.666504442691803} +03/05/2022 10:27:18 - INFO - codeparrot_training - Step 38273: {'lr': 0.00042948355819895655, 'samples': 19596288, 'steps': 38273, 'loss/train': 1.4557350873947144} +03/05/2022 10:27:21 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/05/2022 10:27:23 - INFO - codeparrot_training - Step 38274: {'lr': 0.0004294798640757377, 'samples': 19596800, 'steps': 38274, 'loss/train': 1.6731847524642944} +03/05/2022 10:27:27 - INFO - codeparrot_training - Step 38275: {'lr': 0.00042947616987164787, 'samples': 19597312, 'steps': 38275, 'loss/train': 0.686585545539856} +03/05/2022 10:27:29 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 10:27:32 - INFO - codeparrot_training - Step 38276: {'lr': 0.00042947247558668887, 'samples': 19597824, 'steps': 38276, 'loss/train': 2.279735803604126} +03/05/2022 10:27:35 - INFO - codeparrot_training - Step 38277: {'lr': 0.00042946878122086243, 'samples': 19598336, 'steps': 38277, 'loss/train': 1.514905333518982} +03/05/2022 10:27:37 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/05/2022 10:27:40 - INFO - codeparrot_training - Step 38278: {'lr': 0.00042946508677417007, 'samples': 19598848, 'steps': 38278, 'loss/train': 2.3469104766845703} +03/05/2022 10:27:43 - INFO - codeparrot_training - Step 38279: {'lr': 0.0004294613922466135, 'samples': 19599360, 'steps': 38279, 'loss/train': 2.026970386505127} +03/05/2022 10:27:46 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/05/2022 10:27:49 - INFO - codeparrot_training - Step 38280: {'lr': 0.0004294576976381944, 'samples': 19599872, 'steps': 38280, 'loss/train': 0.8599165678024292} +03/05/2022 10:27:52 - INFO - codeparrot_training - Step 38281: {'lr': 0.00042945400294891445, 'samples': 19600384, 'steps': 38281, 'loss/train': 2.026340961456299} +03/05/2022 10:27:54 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 10:27:57 - INFO - codeparrot_training - Step 38282: {'lr': 0.0004294503081787753, 'samples': 19600896, 'steps': 38282, 'loss/train': 2.039402723312378} +03/05/2022 10:28:00 - INFO - codeparrot_training - Step 38283: {'lr': 0.0004294466133277786, 'samples': 19601408, 'steps': 38283, 'loss/train': 1.7858246564865112} +03/05/2022 10:28:03 - INFO - codeparrot_training - Step 38284: {'lr': 0.00042944291839592597, 'samples': 19601920, 'steps': 38284, 'loss/train': 1.2764191627502441} +03/05/2022 10:28:03 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/05/2022 10:28:09 - INFO - codeparrot_training - Step 38285: {'lr': 0.0004294392233832192, 'samples': 19602432, 'steps': 38285, 'loss/train': 1.5788668394088745} +03/05/2022 10:28:12 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/05/2022 10:28:14 - INFO - codeparrot_training - Step 38286: {'lr': 0.0004294355282896599, 'samples': 19602944, 'steps': 38286, 'loss/train': 1.8752431869506836} +03/05/2022 10:28:17 - INFO - codeparrot_training - Step 38287: {'lr': 0.00042943183311524967, 'samples': 19603456, 'steps': 38287, 'loss/train': 1.8779851198196411} +03/05/2022 10:28:20 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 10:28:22 - INFO - codeparrot_training - Step 38288: {'lr': 0.0004294281378599902, 'samples': 19603968, 'steps': 38288, 'loss/train': 1.5274173021316528} +03/05/2022 10:28:26 - INFO - codeparrot_training - Step 38289: {'lr': 0.00042942444252388323, 'samples': 19604480, 'steps': 38289, 'loss/train': 1.2565371990203857} +03/05/2022 10:28:28 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 10:28:31 - INFO - codeparrot_training - Step 38290: {'lr': 0.0004294207471069304, 'samples': 19604992, 'steps': 38290, 'loss/train': 1.9365266561508179} +03/05/2022 10:28:34 - INFO - codeparrot_training - Step 38291: {'lr': 0.0004294170516091332, 'samples': 19605504, 'steps': 38291, 'loss/train': 1.2148891687393188} +03/05/2022 10:28:37 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 10:28:39 - INFO - codeparrot_training - Step 38292: {'lr': 0.0004294133560304936, 'samples': 19606016, 'steps': 38292, 'loss/train': 1.6216410398483276} +03/05/2022 10:28:43 - INFO - codeparrot_training - Step 38293: {'lr': 0.00042940966037101314, 'samples': 19606528, 'steps': 38293, 'loss/train': 2.0366060733795166} +03/05/2022 10:28:45 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 10:28:48 - INFO - codeparrot_training - Step 38294: {'lr': 0.00042940596463069336, 'samples': 19607040, 'steps': 38294, 'loss/train': 0.9560814499855042} +03/05/2022 10:28:51 - INFO - codeparrot_training - Step 38295: {'lr': 0.00042940226880953605, 'samples': 19607552, 'steps': 38295, 'loss/train': 2.7686333656311035} +03/05/2022 10:28:54 - INFO - codeparrot_training - Step 38296: {'lr': 0.0004293985729075428, 'samples': 19608064, 'steps': 38296, 'loss/train': 1.8770229816436768} +03/05/2022 10:28:54 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 10:29:00 - INFO - codeparrot_training - Step 38297: {'lr': 0.00042939487692471534, 'samples': 19608576, 'steps': 38297, 'loss/train': 1.306634783744812} +03/05/2022 10:29:03 - INFO - codeparrot_training - Step 38298: {'lr': 0.0004293911808610554, 'samples': 19609088, 'steps': 38298, 'loss/train': 1.466132402420044} +03/05/2022 10:29:03 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 10:29:08 - INFO - codeparrot_training - Step 38299: {'lr': 0.0004293874847165645, 'samples': 19609600, 'steps': 38299, 'loss/train': 1.5298848152160645} +03/05/2022 10:29:11 - INFO - codeparrot_training - Skipping example with length 960 (seq_length=1024) +03/05/2022 10:29:13 - INFO - codeparrot_training - Step 38300: {'lr': 0.0004293837884912444, 'samples': 19610112, 'steps': 38300, 'loss/train': 2.3378098011016846} +03/05/2022 10:29:17 - INFO - codeparrot_training - Step 38301: {'lr': 0.00042938009218509667, 'samples': 19610624, 'steps': 38301, 'loss/train': 0.7045778036117554} +03/05/2022 10:29:20 - INFO - codeparrot_training - Step 38302: {'lr': 0.00042937639579812304, 'samples': 19611136, 'steps': 38302, 'loss/train': 1.4344048500061035} +03/05/2022 10:29:20 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 10:29:25 - INFO - codeparrot_training - Step 38303: {'lr': 0.0004293726993303252, 'samples': 19611648, 'steps': 38303, 'loss/train': 1.5720622539520264} +03/05/2022 10:29:28 - INFO - codeparrot_training - Step 38304: {'lr': 0.0004293690027817048, 'samples': 19612160, 'steps': 38304, 'loss/train': 1.8113373517990112} +03/05/2022 10:29:28 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/05/2022 10:29:34 - INFO - codeparrot_training - Step 38305: {'lr': 0.00042936530615226355, 'samples': 19612672, 'steps': 38305, 'loss/train': 1.8836528062820435} +03/05/2022 10:29:37 - INFO - codeparrot_training - Step 38306: {'lr': 0.00042936160944200295, 'samples': 19613184, 'steps': 38306, 'loss/train': 1.197272777557373} +03/05/2022 10:29:37 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 10:29:42 - INFO - codeparrot_training - Step 38307: {'lr': 0.00042935791265092483, 'samples': 19613696, 'steps': 38307, 'loss/train': 1.8337063789367676} +03/05/2022 10:29:45 - INFO - codeparrot_training - Step 38308: {'lr': 0.0004293542157790308, 'samples': 19614208, 'steps': 38308, 'loss/train': 1.720285177230835} +03/05/2022 10:29:45 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 10:29:51 - INFO - codeparrot_training - Step 38309: {'lr': 0.00042935051882632245, 'samples': 19614720, 'steps': 38309, 'loss/train': 1.3284389972686768} +03/05/2022 10:29:54 - INFO - codeparrot_training - Step 38310: {'lr': 0.0004293468217928017, 'samples': 19615232, 'steps': 38310, 'loss/train': 1.8689329624176025} +03/05/2022 10:29:54 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 10:29:59 - INFO - codeparrot_training - Step 38311: {'lr': 0.0004293431246784699, 'samples': 19615744, 'steps': 38311, 'loss/train': 1.2637889385223389} +03/05/2022 10:30:02 - INFO - codeparrot_training - Step 38312: {'lr': 0.0004293394274833289, 'samples': 19616256, 'steps': 38312, 'loss/train': 1.6548107862472534} +03/05/2022 10:30:03 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 10:30:07 - INFO - codeparrot_training - Step 38313: {'lr': 0.0004293357302073804, 'samples': 19616768, 'steps': 38313, 'loss/train': 1.5874419212341309} +03/05/2022 10:30:11 - INFO - codeparrot_training - Step 38314: {'lr': 0.00042933203285062585, 'samples': 19617280, 'steps': 38314, 'loss/train': 1.9115970134735107} +03/05/2022 10:30:11 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 10:30:16 - INFO - codeparrot_training - Step 38315: {'lr': 0.00042932833541306704, 'samples': 19617792, 'steps': 38315, 'loss/train': 1.9426833391189575} +03/05/2022 10:30:19 - INFO - codeparrot_training - Step 38316: {'lr': 0.0004293246378947058, 'samples': 19618304, 'steps': 38316, 'loss/train': 1.7364038228988647} +03/05/2022 10:30:19 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 10:30:24 - INFO - codeparrot_training - Step 38317: {'lr': 0.00042932094029554354, 'samples': 19618816, 'steps': 38317, 'loss/train': 1.488792896270752} +03/05/2022 10:30:27 - INFO - codeparrot_training - Step 38318: {'lr': 0.00042931724261558205, 'samples': 19619328, 'steps': 38318, 'loss/train': 1.5331729650497437} +03/05/2022 10:30:27 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/05/2022 10:30:33 - INFO - codeparrot_training - Step 38319: {'lr': 0.000429313544854823, 'samples': 19619840, 'steps': 38319, 'loss/train': 1.2963359355926514} +03/05/2022 10:30:36 - INFO - codeparrot_training - Step 38320: {'lr': 0.00042930984701326796, 'samples': 19620352, 'steps': 38320, 'loss/train': 1.8010696172714233} +03/05/2022 10:30:36 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/05/2022 10:30:41 - INFO - codeparrot_training - Step 38321: {'lr': 0.0004293061490909187, 'samples': 19620864, 'steps': 38321, 'loss/train': 1.9893141984939575} +03/05/2022 10:30:44 - INFO - codeparrot_training - Step 38322: {'lr': 0.0004293024510877769, 'samples': 19621376, 'steps': 38322, 'loss/train': 1.6426589488983154} +03/05/2022 10:30:44 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/05/2022 10:30:50 - INFO - codeparrot_training - Step 38323: {'lr': 0.00042929875300384417, 'samples': 19621888, 'steps': 38323, 'loss/train': 0.11464940011501312} +03/05/2022 10:30:53 - INFO - codeparrot_training - Step 38324: {'lr': 0.0004292950548391222, 'samples': 19622400, 'steps': 38324, 'loss/train': 1.034578800201416} +03/05/2022 10:30:53 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 10:30:58 - INFO - codeparrot_training - Step 38325: {'lr': 0.00042929135659361265, 'samples': 19622912, 'steps': 38325, 'loss/train': 1.389309048652649} +03/05/2022 10:31:01 - INFO - codeparrot_training - Step 38326: {'lr': 0.0004292876582673171, 'samples': 19623424, 'steps': 38326, 'loss/train': 1.9555165767669678} +03/05/2022 10:31:01 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 10:31:06 - INFO - codeparrot_training - Step 38327: {'lr': 0.0004292839598602374, 'samples': 19623936, 'steps': 38327, 'loss/train': 1.489870548248291} +03/05/2022 10:31:09 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 10:31:12 - INFO - codeparrot_training - Step 38328: {'lr': 0.000429280261372375, 'samples': 19624448, 'steps': 38328, 'loss/train': 1.5003352165222168} +03/05/2022 10:31:15 - INFO - codeparrot_training - Step 38329: {'lr': 0.00042927656280373176, 'samples': 19624960, 'steps': 38329, 'loss/train': 6.536158561706543} +03/05/2022 10:31:18 - INFO - codeparrot_training - Step 38330: {'lr': 0.00042927286415430933, 'samples': 19625472, 'steps': 38330, 'loss/train': 1.9757002592086792} +03/05/2022 10:31:20 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/05/2022 10:31:23 - INFO - codeparrot_training - Step 38331: {'lr': 0.0004292691654241092, 'samples': 19625984, 'steps': 38331, 'loss/train': 2.4778330326080322} +03/05/2022 10:31:27 - INFO - codeparrot_training - Step 38332: {'lr': 0.00042926546661313313, 'samples': 19626496, 'steps': 38332, 'loss/train': 1.966078519821167} +03/05/2022 10:31:28 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 10:31:32 - INFO - codeparrot_training - Step 38333: {'lr': 0.00042926176772138295, 'samples': 19627008, 'steps': 38333, 'loss/train': 4.749517440795898} +03/05/2022 10:31:35 - INFO - codeparrot_training - Step 38334: {'lr': 0.0004292580687488601, 'samples': 19627520, 'steps': 38334, 'loss/train': 1.9996683597564697} +03/05/2022 10:31:36 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/05/2022 10:31:40 - INFO - codeparrot_training - Step 38335: {'lr': 0.0004292543696955663, 'samples': 19628032, 'steps': 38335, 'loss/train': 2.3912599086761475} +03/05/2022 10:31:43 - INFO - codeparrot_training - Step 38336: {'lr': 0.00042925067056150324, 'samples': 19628544, 'steps': 38336, 'loss/train': 1.6615673303604126} +03/05/2022 10:31:45 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 10:31:49 - INFO - codeparrot_training - Step 38337: {'lr': 0.0004292469713466727, 'samples': 19629056, 'steps': 38337, 'loss/train': 1.4507231712341309} +03/05/2022 10:31:52 - INFO - codeparrot_training - Step 38338: {'lr': 0.00042924327205107616, 'samples': 19629568, 'steps': 38338, 'loss/train': 4.09354305267334} +03/05/2022 10:31:53 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/05/2022 10:31:57 - INFO - codeparrot_training - Step 38339: {'lr': 0.00042923957267471536, 'samples': 19630080, 'steps': 38339, 'loss/train': 2.3709399700164795} +03/05/2022 10:32:00 - INFO - codeparrot_training - Step 38340: {'lr': 0.000429235873217592, 'samples': 19630592, 'steps': 38340, 'loss/train': 1.7215328216552734} +03/05/2022 10:32:02 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/05/2022 10:32:06 - INFO - codeparrot_training - Step 38341: {'lr': 0.0004292321736797077, 'samples': 19631104, 'steps': 38341, 'loss/train': 2.362046957015991} +03/05/2022 10:32:09 - INFO - codeparrot_training - Step 38342: {'lr': 0.0004292284740610642, 'samples': 19631616, 'steps': 38342, 'loss/train': 1.0728541612625122} +03/05/2022 10:32:10 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/05/2022 10:32:14 - INFO - codeparrot_training - Step 38343: {'lr': 0.0004292247743616631, 'samples': 19632128, 'steps': 38343, 'loss/train': 1.2901664972305298} +03/05/2022 10:32:17 - INFO - codeparrot_training - Step 38344: {'lr': 0.00042922107458150604, 'samples': 19632640, 'steps': 38344, 'loss/train': 1.4820536375045776} +03/05/2022 10:32:19 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/05/2022 10:32:23 - INFO - codeparrot_training - Step 38345: {'lr': 0.00042921737472059474, 'samples': 19633152, 'steps': 38345, 'loss/train': 2.4663703441619873} +03/05/2022 10:32:26 - INFO - codeparrot_training - Step 38346: {'lr': 0.0004292136747789309, 'samples': 19633664, 'steps': 38346, 'loss/train': 2.121039628982544} +03/05/2022 10:32:27 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 10:32:31 - INFO - codeparrot_training - Step 38347: {'lr': 0.00042920997475651607, 'samples': 19634176, 'steps': 38347, 'loss/train': 0.7485745549201965} +03/05/2022 10:32:34 - INFO - codeparrot_training - Step 38348: {'lr': 0.00042920627465335205, 'samples': 19634688, 'steps': 38348, 'loss/train': 2.2158560752868652} +03/05/2022 10:32:35 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/05/2022 10:32:39 - INFO - codeparrot_training - Step 38349: {'lr': 0.00042920257446944044, 'samples': 19635200, 'steps': 38349, 'loss/train': 2.546020030975342} +03/05/2022 10:32:43 - INFO - codeparrot_training - Step 38350: {'lr': 0.0004291988742047829, 'samples': 19635712, 'steps': 38350, 'loss/train': 1.5706251859664917} +03/05/2022 10:32:43 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/05/2022 10:32:48 - INFO - codeparrot_training - Step 38351: {'lr': 0.0004291951738593811, 'samples': 19636224, 'steps': 38351, 'loss/train': 1.5542936325073242} +03/05/2022 10:32:51 - INFO - codeparrot_training - Step 38352: {'lr': 0.0004291914734332367, 'samples': 19636736, 'steps': 38352, 'loss/train': 1.4142167568206787} +03/05/2022 10:32:52 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 10:32:56 - INFO - codeparrot_training - Step 38353: {'lr': 0.0004291877729263515, 'samples': 19637248, 'steps': 38353, 'loss/train': 1.2176845073699951} +03/05/2022 10:32:59 - INFO - codeparrot_training - Step 38354: {'lr': 0.0004291840723387269, 'samples': 19637760, 'steps': 38354, 'loss/train': 0.9256685376167297} +03/05/2022 10:33:00 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 10:33:05 - INFO - codeparrot_training - Step 38355: {'lr': 0.0004291803716703648, 'samples': 19638272, 'steps': 38355, 'loss/train': 2.225929021835327} +03/05/2022 10:33:08 - INFO - codeparrot_training - Step 38356: {'lr': 0.0004291766709212668, 'samples': 19638784, 'steps': 38356, 'loss/train': 1.197957158088684} +03/05/2022 10:33:09 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 10:33:13 - INFO - codeparrot_training - Step 38357: {'lr': 0.00042917297009143455, 'samples': 19639296, 'steps': 38357, 'loss/train': 2.3322348594665527} +03/05/2022 10:33:16 - INFO - codeparrot_training - Step 38358: {'lr': 0.00042916926918086973, 'samples': 19639808, 'steps': 38358, 'loss/train': 1.159559726715088} +03/05/2022 10:33:17 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 10:33:22 - INFO - codeparrot_training - Step 38359: {'lr': 0.000429165568189574, 'samples': 19640320, 'steps': 38359, 'loss/train': 2.478180408477783} +03/05/2022 10:33:25 - INFO - codeparrot_training - Step 38360: {'lr': 0.000429161867117549, 'samples': 19640832, 'steps': 38360, 'loss/train': 1.1967761516571045} +03/05/2022 10:33:26 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 10:33:30 - INFO - codeparrot_training - Step 38361: {'lr': 0.0004291581659647965, 'samples': 19641344, 'steps': 38361, 'loss/train': 1.6914615631103516} +03/05/2022 10:33:33 - INFO - codeparrot_training - Step 38362: {'lr': 0.00042915446473131805, 'samples': 19641856, 'steps': 38362, 'loss/train': 2.3513453006744385} +03/05/2022 10:33:34 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 10:33:39 - INFO - codeparrot_training - Step 38363: {'lr': 0.0004291507634171153, 'samples': 19642368, 'steps': 38363, 'loss/train': 2.2858219146728516} +03/05/2022 10:33:42 - INFO - codeparrot_training - Step 38364: {'lr': 0.0004291470620221901, 'samples': 19642880, 'steps': 38364, 'loss/train': 0.5959856510162354} +03/05/2022 10:33:42 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/05/2022 10:33:47 - INFO - codeparrot_training - Step 38365: {'lr': 0.0004291433605465439, 'samples': 19643392, 'steps': 38365, 'loss/train': 2.391291856765747} +03/05/2022 10:33:50 - INFO - codeparrot_training - Step 38366: {'lr': 0.00042913965899017855, 'samples': 19643904, 'steps': 38366, 'loss/train': 2.0616636276245117} +03/05/2022 10:33:51 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/05/2022 10:33:55 - INFO - codeparrot_training - Step 38367: {'lr': 0.0004291359573530956, 'samples': 19644416, 'steps': 38367, 'loss/train': 1.4300143718719482} +03/05/2022 10:33:59 - INFO - codeparrot_training - Step 38368: {'lr': 0.0004291322556352967, 'samples': 19644928, 'steps': 38368, 'loss/train': 1.7643640041351318} +03/05/2022 10:34:00 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/05/2022 10:34:04 - INFO - codeparrot_training - Step 38369: {'lr': 0.00042912855383678365, 'samples': 19645440, 'steps': 38369, 'loss/train': 1.811132788658142} +03/05/2022 10:34:07 - INFO - codeparrot_training - Step 38370: {'lr': 0.000429124851957558, 'samples': 19645952, 'steps': 38370, 'loss/train': 2.6342947483062744} +03/05/2022 10:34:08 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 10:34:12 - INFO - codeparrot_training - Step 38371: {'lr': 0.0004291211499976214, 'samples': 19646464, 'steps': 38371, 'loss/train': 0.6090827584266663} +03/05/2022 10:34:15 - INFO - codeparrot_training - Step 38372: {'lr': 0.0004291174479569757, 'samples': 19646976, 'steps': 38372, 'loss/train': 2.2093231678009033} +03/05/2022 10:34:16 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 10:34:21 - INFO - codeparrot_training - Step 38373: {'lr': 0.00042911374583562233, 'samples': 19647488, 'steps': 38373, 'loss/train': 1.0194138288497925} +03/05/2022 10:34:24 - INFO - codeparrot_training - Step 38374: {'lr': 0.0004291100436335631, 'samples': 19648000, 'steps': 38374, 'loss/train': 1.4219602346420288} +03/05/2022 10:34:25 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/05/2022 10:34:29 - INFO - codeparrot_training - Step 38375: {'lr': 0.00042910634135079963, 'samples': 19648512, 'steps': 38375, 'loss/train': 0.8629974722862244} +03/05/2022 10:34:32 - INFO - codeparrot_training - Step 38376: {'lr': 0.00042910263898733364, 'samples': 19649024, 'steps': 38376, 'loss/train': 1.8268239498138428} +03/05/2022 10:34:33 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 10:34:38 - INFO - codeparrot_training - Step 38377: {'lr': 0.0004290989365431668, 'samples': 19649536, 'steps': 38377, 'loss/train': 1.638385534286499} +03/05/2022 10:34:41 - INFO - codeparrot_training - Step 38378: {'lr': 0.0004290952340183007, 'samples': 19650048, 'steps': 38378, 'loss/train': 1.6871306896209717} +03/05/2022 10:34:41 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/05/2022 10:34:46 - INFO - codeparrot_training - Step 38379: {'lr': 0.00042909153141273705, 'samples': 19650560, 'steps': 38379, 'loss/train': 1.151309847831726} +03/05/2022 10:34:49 - INFO - codeparrot_training - Step 38380: {'lr': 0.0004290878287264775, 'samples': 19651072, 'steps': 38380, 'loss/train': 1.0523532629013062} +03/05/2022 10:34:50 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/05/2022 10:34:55 - INFO - codeparrot_training - Step 38381: {'lr': 0.0004290841259595237, 'samples': 19651584, 'steps': 38381, 'loss/train': 1.8185559511184692} +03/05/2022 10:34:58 - INFO - codeparrot_training - Step 38382: {'lr': 0.00042908042311187744, 'samples': 19652096, 'steps': 38382, 'loss/train': 0.4950217306613922} +03/05/2022 10:34:58 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 10:35:03 - INFO - codeparrot_training - Step 38383: {'lr': 0.00042907672018354027, 'samples': 19652608, 'steps': 38383, 'loss/train': 0.7234829068183899} +03/05/2022 10:35:06 - INFO - codeparrot_training - Step 38384: {'lr': 0.00042907301717451396, 'samples': 19653120, 'steps': 38384, 'loss/train': 1.62031888961792} +03/05/2022 10:35:07 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 10:35:11 - INFO - codeparrot_training - Step 38385: {'lr': 0.0004290693140848, 'samples': 19653632, 'steps': 38385, 'loss/train': 1.875291347503662} +03/05/2022 10:35:15 - INFO - codeparrot_training - Step 38386: {'lr': 0.0004290656109144003, 'samples': 19654144, 'steps': 38386, 'loss/train': 1.7963448762893677} +03/05/2022 10:35:15 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/05/2022 10:35:20 - INFO - codeparrot_training - Step 38387: {'lr': 0.0004290619076633163, 'samples': 19654656, 'steps': 38387, 'loss/train': 1.1610013246536255} +03/05/2022 10:35:23 - INFO - codeparrot_training - Step 38388: {'lr': 0.0004290582043315498, 'samples': 19655168, 'steps': 38388, 'loss/train': 1.3825302124023438} +03/05/2022 10:35:23 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 10:35:28 - INFO - codeparrot_training - Step 38389: {'lr': 0.0004290545009191024, 'samples': 19655680, 'steps': 38389, 'loss/train': 1.243184208869934} +03/05/2022 10:35:32 - INFO - codeparrot_training - Step 38390: {'lr': 0.0004290507974259759, 'samples': 19656192, 'steps': 38390, 'loss/train': 2.1244990825653076} +03/05/2022 10:35:32 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 10:35:37 - INFO - codeparrot_training - Step 38391: {'lr': 0.0004290470938521718, 'samples': 19656704, 'steps': 38391, 'loss/train': 1.3065741062164307} +03/05/2022 10:35:40 - INFO - codeparrot_training - Step 38392: {'lr': 0.0004290433901976918, 'samples': 19657216, 'steps': 38392, 'loss/train': 1.8737469911575317} +03/05/2022 10:35:40 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/05/2022 10:35:46 - INFO - codeparrot_training - Step 38393: {'lr': 0.0004290396864625377, 'samples': 19657728, 'steps': 38393, 'loss/train': 1.0331001281738281} +03/05/2022 10:35:49 - INFO - codeparrot_training - Step 38394: {'lr': 0.000429035982646711, 'samples': 19658240, 'steps': 38394, 'loss/train': 1.5800210237503052} +03/05/2022 10:35:49 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 10:35:54 - INFO - codeparrot_training - Step 38395: {'lr': 0.0004290322787502135, 'samples': 19658752, 'steps': 38395, 'loss/train': 2.151383638381958} +03/05/2022 10:35:57 - INFO - codeparrot_training - Step 38396: {'lr': 0.0004290285747730468, 'samples': 19659264, 'steps': 38396, 'loss/train': 1.99385666847229} +03/05/2022 10:35:57 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 10:36:03 - INFO - codeparrot_training - Step 38397: {'lr': 0.00042902487071521257, 'samples': 19659776, 'steps': 38397, 'loss/train': 1.6579667329788208} +03/05/2022 10:36:06 - INFO - codeparrot_training - Step 38398: {'lr': 0.0004290211665767125, 'samples': 19660288, 'steps': 38398, 'loss/train': 1.331533670425415} +03/05/2022 10:36:06 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 10:36:11 - INFO - codeparrot_training - Step 38399: {'lr': 0.00042901746235754837, 'samples': 19660800, 'steps': 38399, 'loss/train': 2.3798375129699707} +03/05/2022 10:36:14 - INFO - codeparrot_training - Step 38400: {'lr': 0.0004290137580577216, 'samples': 19661312, 'steps': 38400, 'loss/train': 2.1654205322265625} +03/05/2022 10:36:14 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 10:36:20 - INFO - codeparrot_training - Step 38401: {'lr': 0.000429010053677234, 'samples': 19661824, 'steps': 38401, 'loss/train': 1.4910234212875366} +03/05/2022 10:36:23 - INFO - codeparrot_training - Step 38402: {'lr': 0.00042900634921608726, 'samples': 19662336, 'steps': 38402, 'loss/train': 1.06117582321167} +03/05/2022 10:36:23 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 10:36:28 - INFO - codeparrot_training - Step 38403: {'lr': 0.0004290026446742831, 'samples': 19662848, 'steps': 38403, 'loss/train': 1.4278509616851807} +03/05/2022 10:36:31 - INFO - codeparrot_training - Step 38404: {'lr': 0.00042899894005182294, 'samples': 19663360, 'steps': 38404, 'loss/train': 1.6785449981689453} +03/05/2022 10:36:31 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/05/2022 10:36:37 - INFO - codeparrot_training - Step 38405: {'lr': 0.0004289952353487088, 'samples': 19663872, 'steps': 38405, 'loss/train': 1.9514307975769043} +03/05/2022 10:36:40 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 10:36:42 - INFO - codeparrot_training - Step 38406: {'lr': 0.000428991530564942, 'samples': 19664384, 'steps': 38406, 'loss/train': 1.7903854846954346} +03/05/2022 10:36:45 - INFO - codeparrot_training - Step 38407: {'lr': 0.00042898782570052453, 'samples': 19664896, 'steps': 38407, 'loss/train': 1.4764090776443481} +03/05/2022 10:36:48 - INFO - codeparrot_training - Step 38408: {'lr': 0.0004289841207554578, 'samples': 19665408, 'steps': 38408, 'loss/train': 2.009769916534424} +03/05/2022 10:36:48 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 10:36:54 - INFO - codeparrot_training - Step 38409: {'lr': 0.00042898041572974363, 'samples': 19665920, 'steps': 38409, 'loss/train': 1.007509708404541} +03/05/2022 10:36:57 - INFO - codeparrot_training - Step 38410: {'lr': 0.0004289767106233836, 'samples': 19666432, 'steps': 38410, 'loss/train': 2.185873508453369} +03/05/2022 10:36:57 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 10:37:02 - INFO - codeparrot_training - Step 38411: {'lr': 0.0004289730054363795, 'samples': 19666944, 'steps': 38411, 'loss/train': 1.8891595602035522} +03/05/2022 10:37:05 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 10:37:07 - INFO - codeparrot_training - Step 38412: {'lr': 0.00042896930016873293, 'samples': 19667456, 'steps': 38412, 'loss/train': 1.914839267730713} +03/05/2022 10:37:11 - INFO - codeparrot_training - Step 38413: {'lr': 0.0004289655948204455, 'samples': 19667968, 'steps': 38413, 'loss/train': 1.4405065774917603} +03/05/2022 10:37:13 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/05/2022 10:37:16 - INFO - codeparrot_training - Step 38414: {'lr': 0.00042896188939151893, 'samples': 19668480, 'steps': 38414, 'loss/train': 2.356462001800537} +03/05/2022 10:37:19 - INFO - codeparrot_training - Step 38415: {'lr': 0.00042895818388195497, 'samples': 19668992, 'steps': 38415, 'loss/train': 1.7206308841705322} +03/05/2022 10:37:22 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 10:37:24 - INFO - codeparrot_training - Step 38416: {'lr': 0.00042895447829175516, 'samples': 19669504, 'steps': 38416, 'loss/train': 1.2121211290359497} +03/05/2022 10:37:27 - INFO - codeparrot_training - Step 38417: {'lr': 0.00042895077262092117, 'samples': 19670016, 'steps': 38417, 'loss/train': 1.2586681842803955} +03/05/2022 10:37:33 - INFO - codeparrot_training - Step 38418: {'lr': 0.00042894706686945485, 'samples': 19670528, 'steps': 38418, 'loss/train': 1.6650660037994385} +03/05/2022 10:37:36 - INFO - codeparrot_training - Step 38419: {'lr': 0.00042894336103735766, 'samples': 19671040, 'steps': 38419, 'loss/train': 1.1475378274917603} +03/05/2022 10:37:39 - INFO - codeparrot_training - Step 38420: {'lr': 0.0004289396551246313, 'samples': 19671552, 'steps': 38420, 'loss/train': 1.2205075025558472} +03/05/2022 10:37:40 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 10:37:44 - INFO - codeparrot_training - Step 38421: {'lr': 0.0004289359491312776, 'samples': 19672064, 'steps': 38421, 'loss/train': 1.6022918224334717} +03/05/2022 10:37:48 - INFO - codeparrot_training - Step 38422: {'lr': 0.00042893224305729806, 'samples': 19672576, 'steps': 38422, 'loss/train': 0.9790200591087341} +03/05/2022 10:37:48 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 10:37:53 - INFO - codeparrot_training - Step 38423: {'lr': 0.0004289285369026944, 'samples': 19673088, 'steps': 38423, 'loss/train': 2.402050018310547} +03/05/2022 10:37:56 - INFO - codeparrot_training - Step 38424: {'lr': 0.00042892483066746836, 'samples': 19673600, 'steps': 38424, 'loss/train': 0.5828243494033813} +03/05/2022 10:37:56 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 10:38:01 - INFO - codeparrot_training - Step 38425: {'lr': 0.0004289211243516216, 'samples': 19674112, 'steps': 38425, 'loss/train': 1.963414192199707} +03/05/2022 10:38:05 - INFO - codeparrot_training - Step 38426: {'lr': 0.0004289174179551556, 'samples': 19674624, 'steps': 38426, 'loss/train': 1.612033724784851} +03/05/2022 10:38:05 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/05/2022 10:38:10 - INFO - codeparrot_training - Step 38427: {'lr': 0.0004289137114780722, 'samples': 19675136, 'steps': 38427, 'loss/train': 1.9628162384033203} +03/05/2022 10:38:13 - INFO - codeparrot_training - Step 38428: {'lr': 0.00042891000492037315, 'samples': 19675648, 'steps': 38428, 'loss/train': 1.6535027027130127} +03/05/2022 10:38:13 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/05/2022 10:38:18 - INFO - codeparrot_training - Step 38429: {'lr': 0.00042890629828205997, 'samples': 19676160, 'steps': 38429, 'loss/train': 0.8594965934753418} +03/05/2022 10:38:21 - INFO - codeparrot_training - Step 38430: {'lr': 0.0004289025915631343, 'samples': 19676672, 'steps': 38430, 'loss/train': 2.3969826698303223} +03/05/2022 10:38:22 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 10:38:27 - INFO - codeparrot_training - Step 38431: {'lr': 0.00042889888476359793, 'samples': 19677184, 'steps': 38431, 'loss/train': 6.413302421569824} +03/05/2022 10:38:30 - INFO - codeparrot_training - Step 38432: {'lr': 0.0004288951778834525, 'samples': 19677696, 'steps': 38432, 'loss/train': 1.6811875104904175} +03/05/2022 10:38:31 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/05/2022 10:38:35 - INFO - codeparrot_training - Step 38433: {'lr': 0.00042889147092269964, 'samples': 19678208, 'steps': 38433, 'loss/train': 0.9800106287002563} +03/05/2022 10:38:39 - INFO - codeparrot_training - Step 38434: {'lr': 0.0004288877638813411, 'samples': 19678720, 'steps': 38434, 'loss/train': 1.6396557092666626} +03/05/2022 10:38:39 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 10:38:44 - INFO - codeparrot_training - Step 38435: {'lr': 0.00042888405675937843, 'samples': 19679232, 'steps': 38435, 'loss/train': 1.7083581686019897} +03/05/2022 10:38:47 - INFO - codeparrot_training - Step 38436: {'lr': 0.00042888034955681337, 'samples': 19679744, 'steps': 38436, 'loss/train': 2.1615352630615234} +03/05/2022 10:38:48 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 10:38:52 - INFO - codeparrot_training - Step 38437: {'lr': 0.0004288766422736476, 'samples': 19680256, 'steps': 38437, 'loss/train': 1.7751213312149048} +03/05/2022 10:38:56 - INFO - codeparrot_training - Step 38438: {'lr': 0.00042887293490988276, 'samples': 19680768, 'steps': 38438, 'loss/train': 1.7572563886642456} +03/05/2022 10:38:56 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 10:39:01 - INFO - codeparrot_training - Step 38439: {'lr': 0.00042886922746552056, 'samples': 19681280, 'steps': 38439, 'loss/train': 2.136826276779175} +03/05/2022 10:39:04 - INFO - codeparrot_training - Step 38440: {'lr': 0.0004288655199405626, 'samples': 19681792, 'steps': 38440, 'loss/train': 1.478708267211914} +03/05/2022 10:39:04 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 10:39:09 - INFO - codeparrot_training - Step 38441: {'lr': 0.00042886181233501067, 'samples': 19682304, 'steps': 38441, 'loss/train': 2.2431581020355225} +03/05/2022 10:39:12 - INFO - codeparrot_training - Step 38442: {'lr': 0.00042885810464886635, 'samples': 19682816, 'steps': 38442, 'loss/train': 2.1431119441986084} +03/05/2022 10:39:13 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 10:39:18 - INFO - codeparrot_training - Step 38443: {'lr': 0.0004288543968821312, 'samples': 19683328, 'steps': 38443, 'loss/train': 1.6908425092697144} +03/05/2022 10:39:21 - INFO - codeparrot_training - Step 38444: {'lr': 0.00042885068903480717, 'samples': 19683840, 'steps': 38444, 'loss/train': 0.6604570150375366} +03/05/2022 10:39:21 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 10:39:26 - INFO - codeparrot_training - Step 38445: {'lr': 0.00042884698110689574, 'samples': 19684352, 'steps': 38445, 'loss/train': 0.9049607515335083} +03/05/2022 10:39:29 - INFO - codeparrot_training - Step 38446: {'lr': 0.00042884327309839865, 'samples': 19684864, 'steps': 38446, 'loss/train': 1.7797797918319702} +03/05/2022 10:39:29 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 10:39:34 - INFO - codeparrot_training - Step 38447: {'lr': 0.0004288395650093174, 'samples': 19685376, 'steps': 38447, 'loss/train': 1.7316081523895264} +03/05/2022 10:39:37 - INFO - codeparrot_training - Step 38448: {'lr': 0.000428835856839654, 'samples': 19685888, 'steps': 38448, 'loss/train': 1.2837438583374023} +03/05/2022 10:39:38 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 10:39:43 - INFO - codeparrot_training - Step 38449: {'lr': 0.0004288321485894098, 'samples': 19686400, 'steps': 38449, 'loss/train': 0.8910672068595886} +03/05/2022 10:39:46 - INFO - codeparrot_training - Step 38450: {'lr': 0.0004288284402585866, 'samples': 19686912, 'steps': 38450, 'loss/train': 3.5198237895965576} +03/05/2022 10:39:46 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/05/2022 10:39:51 - INFO - codeparrot_training - Step 38451: {'lr': 0.0004288247318471861, 'samples': 19687424, 'steps': 38451, 'loss/train': 1.235252022743225} +03/05/2022 10:39:54 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 10:39:56 - INFO - codeparrot_training - Step 38452: {'lr': 0.0004288210233552099, 'samples': 19687936, 'steps': 38452, 'loss/train': 1.6333867311477661} +03/05/2022 10:40:00 - INFO - codeparrot_training - Step 38453: {'lr': 0.00042881731478265975, 'samples': 19688448, 'steps': 38453, 'loss/train': 1.8736987113952637} +03/05/2022 10:40:02 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/05/2022 10:40:05 - INFO - codeparrot_training - Step 38454: {'lr': 0.00042881360612953724, 'samples': 19688960, 'steps': 38454, 'loss/train': 1.6093828678131104} +03/05/2022 10:40:08 - INFO - codeparrot_training - Step 38455: {'lr': 0.0004288098973958441, 'samples': 19689472, 'steps': 38455, 'loss/train': 1.8442496061325073} +03/05/2022 10:40:11 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 10:40:13 - INFO - codeparrot_training - Step 38456: {'lr': 0.000428806188581582, 'samples': 19689984, 'steps': 38456, 'loss/train': 2.321953773498535} +03/05/2022 10:40:17 - INFO - codeparrot_training - Step 38457: {'lr': 0.00042880247968675255, 'samples': 19690496, 'steps': 38457, 'loss/train': 2.0819449424743652} +03/05/2022 10:40:19 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/05/2022 10:40:22 - INFO - codeparrot_training - Step 38458: {'lr': 0.00042879877071135746, 'samples': 19691008, 'steps': 38458, 'loss/train': 2.7108960151672363} +03/05/2022 10:40:25 - INFO - codeparrot_training - Step 38459: {'lr': 0.0004287950616553984, 'samples': 19691520, 'steps': 38459, 'loss/train': 2.0531153678894043} +03/05/2022 10:40:28 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/05/2022 10:40:30 - INFO - codeparrot_training - Step 38460: {'lr': 0.0004287913525188771, 'samples': 19692032, 'steps': 38460, 'loss/train': 2.273984432220459} +03/05/2022 10:40:34 - INFO - codeparrot_training - Step 38461: {'lr': 0.0004287876433017951, 'samples': 19692544, 'steps': 38461, 'loss/train': 2.0924222469329834} +03/05/2022 10:40:36 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 10:40:39 - INFO - codeparrot_training - Step 38462: {'lr': 0.0004287839340041542, 'samples': 19693056, 'steps': 38462, 'loss/train': 1.4327436685562134} +03/05/2022 10:40:42 - INFO - codeparrot_training - Step 38463: {'lr': 0.000428780224625956, 'samples': 19693568, 'steps': 38463, 'loss/train': 1.6557176113128662} +03/05/2022 10:40:45 - INFO - codeparrot_training - Step 38464: {'lr': 0.00042877651516720215, 'samples': 19694080, 'steps': 38464, 'loss/train': 1.746933937072754} +03/05/2022 10:40:46 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/05/2022 10:40:51 - INFO - codeparrot_training - Step 38465: {'lr': 0.0004287728056278944, 'samples': 19694592, 'steps': 38465, 'loss/train': 1.970833659172058} +03/05/2022 10:40:54 - INFO - codeparrot_training - Step 38466: {'lr': 0.00042876909600803444, 'samples': 19695104, 'steps': 38466, 'loss/train': 0.1481592208147049} +03/05/2022 10:40:54 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/05/2022 10:40:59 - INFO - codeparrot_training - Step 38467: {'lr': 0.00042876538630762386, 'samples': 19695616, 'steps': 38467, 'loss/train': 2.0764636993408203} +03/05/2022 10:41:02 - INFO - codeparrot_training - Step 38468: {'lr': 0.00042876167652666433, 'samples': 19696128, 'steps': 38468, 'loss/train': 2.1492557525634766} +03/05/2022 10:41:03 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/05/2022 10:41:08 - INFO - codeparrot_training - Step 38469: {'lr': 0.0004287579666651575, 'samples': 19696640, 'steps': 38469, 'loss/train': 2.0843088626861572} +03/05/2022 10:41:11 - INFO - codeparrot_training - Step 38470: {'lr': 0.00042875425672310506, 'samples': 19697152, 'steps': 38470, 'loss/train': 1.6615303754806519} +03/05/2022 10:41:11 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 10:41:16 - INFO - codeparrot_training - Step 38471: {'lr': 0.00042875054670050885, 'samples': 19697664, 'steps': 38471, 'loss/train': 0.22629989683628082} +03/05/2022 10:41:19 - INFO - codeparrot_training - Step 38472: {'lr': 0.00042874683659737035, 'samples': 19698176, 'steps': 38472, 'loss/train': 1.711127519607544} +03/05/2022 10:41:20 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/05/2022 10:41:25 - INFO - codeparrot_training - Step 38473: {'lr': 0.0004287431264136913, 'samples': 19698688, 'steps': 38473, 'loss/train': 1.490905523300171} +03/05/2022 10:41:28 - INFO - codeparrot_training - Step 38474: {'lr': 0.0004287394161494733, 'samples': 19699200, 'steps': 38474, 'loss/train': 0.2891647219657898} +03/05/2022 10:41:28 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/05/2022 10:41:33 - INFO - codeparrot_training - Step 38475: {'lr': 0.0004287357058047181, 'samples': 19699712, 'steps': 38475, 'loss/train': 2.259152412414551} +03/05/2022 10:41:36 - INFO - codeparrot_training - Step 38476: {'lr': 0.00042873199537942733, 'samples': 19700224, 'steps': 38476, 'loss/train': 1.9291691780090332} +03/05/2022 10:41:37 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 10:41:42 - INFO - codeparrot_training - Step 38477: {'lr': 0.0004287282848736027, 'samples': 19700736, 'steps': 38477, 'loss/train': 1.4054182767868042} +03/05/2022 10:41:45 - INFO - codeparrot_training - Step 38478: {'lr': 0.00042872457428724586, 'samples': 19701248, 'steps': 38478, 'loss/train': 1.7675607204437256} +03/05/2022 10:41:45 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 10:41:50 - INFO - codeparrot_training - Step 38479: {'lr': 0.00042872086362035844, 'samples': 19701760, 'steps': 38479, 'loss/train': 1.40352201461792} +03/05/2022 10:41:53 - INFO - codeparrot_training - Step 38480: {'lr': 0.00042871715287294223, 'samples': 19702272, 'steps': 38480, 'loss/train': 1.5122416019439697} +03/05/2022 10:41:54 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 10:41:58 - INFO - codeparrot_training - Step 38481: {'lr': 0.00042871344204499886, 'samples': 19702784, 'steps': 38481, 'loss/train': 1.637404441833496} +03/05/2022 10:42:02 - INFO - codeparrot_training - Step 38482: {'lr': 0.0004287097311365299, 'samples': 19703296, 'steps': 38482, 'loss/train': 1.4677963256835938} +03/05/2022 10:42:02 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/05/2022 10:42:07 - INFO - codeparrot_training - Step 38483: {'lr': 0.00042870602014753707, 'samples': 19703808, 'steps': 38483, 'loss/train': 1.6970757246017456} +03/05/2022 10:42:10 - INFO - codeparrot_training - Step 38484: {'lr': 0.0004287023090780221, 'samples': 19704320, 'steps': 38484, 'loss/train': 1.9356173276901245} +03/05/2022 10:42:11 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 10:42:15 - INFO - codeparrot_training - Step 38485: {'lr': 0.0004286985979279866, 'samples': 19704832, 'steps': 38485, 'loss/train': 2.162578582763672} +03/05/2022 10:42:18 - INFO - codeparrot_training - Step 38486: {'lr': 0.0004286948866974323, 'samples': 19705344, 'steps': 38486, 'loss/train': 1.7900238037109375} +03/05/2022 10:42:19 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 10:42:24 - INFO - codeparrot_training - Step 38487: {'lr': 0.0004286911753863608, 'samples': 19705856, 'steps': 38487, 'loss/train': 1.3912684917449951} +03/05/2022 10:42:27 - INFO - codeparrot_training - Step 38488: {'lr': 0.0004286874639947739, 'samples': 19706368, 'steps': 38488, 'loss/train': 1.6374905109405518} +03/05/2022 10:42:27 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/05/2022 10:42:32 - INFO - codeparrot_training - Step 38489: {'lr': 0.0004286837525226731, 'samples': 19706880, 'steps': 38489, 'loss/train': 1.323987364768982} +03/05/2022 10:42:35 - INFO - codeparrot_training - Step 38490: {'lr': 0.0004286800409700602, 'samples': 19707392, 'steps': 38490, 'loss/train': 1.8008346557617188} +03/05/2022 10:42:36 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/05/2022 10:42:41 - INFO - codeparrot_training - Step 38491: {'lr': 0.0004286763293369369, 'samples': 19707904, 'steps': 38491, 'loss/train': 2.441908836364746} +03/05/2022 10:42:44 - INFO - codeparrot_training - Step 38492: {'lr': 0.00042867261762330466, 'samples': 19708416, 'steps': 38492, 'loss/train': 1.6079301834106445} +03/05/2022 10:42:44 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 10:42:49 - INFO - codeparrot_training - Step 38493: {'lr': 0.0004286689058291654, 'samples': 19708928, 'steps': 38493, 'loss/train': 1.8984520435333252} +03/05/2022 10:42:52 - INFO - codeparrot_training - Step 38494: {'lr': 0.00042866519395452063, 'samples': 19709440, 'steps': 38494, 'loss/train': 0.376102089881897} +03/05/2022 10:42:52 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/05/2022 10:42:57 - INFO - codeparrot_training - Step 38495: {'lr': 0.00042866148199937216, 'samples': 19709952, 'steps': 38495, 'loss/train': 1.2295597791671753} +03/05/2022 10:43:01 - INFO - codeparrot_training - Step 38496: {'lr': 0.00042865776996372146, 'samples': 19710464, 'steps': 38496, 'loss/train': 1.580771565437317} +03/05/2022 10:43:01 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 10:43:06 - INFO - codeparrot_training - Step 38497: {'lr': 0.00042865405784757037, 'samples': 19710976, 'steps': 38497, 'loss/train': 2.090898036956787} +03/05/2022 10:43:09 - INFO - codeparrot_training - Step 38498: {'lr': 0.0004286503456509206, 'samples': 19711488, 'steps': 38498, 'loss/train': 1.4580153226852417} +03/05/2022 10:43:09 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/05/2022 10:43:14 - INFO - codeparrot_training - Step 38499: {'lr': 0.0004286466333737737, 'samples': 19712000, 'steps': 38499, 'loss/train': 1.817798137664795} +03/05/2022 10:43:17 - INFO - codeparrot_training - Step 38500: {'lr': 0.00042864292101613133, 'samples': 19712512, 'steps': 38500, 'loss/train': 1.8921886682510376} +03/05/2022 10:43:18 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/05/2022 10:43:23 - INFO - codeparrot_training - Step 38501: {'lr': 0.0004286392085779953, 'samples': 19713024, 'steps': 38501, 'loss/train': 2.1208791732788086} +03/05/2022 10:43:26 - INFO - codeparrot_training - Step 38502: {'lr': 0.00042863549605936716, 'samples': 19713536, 'steps': 38502, 'loss/train': 1.3319653272628784} +03/05/2022 10:43:26 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/05/2022 10:43:31 - INFO - codeparrot_training - Step 38503: {'lr': 0.00042863178346024856, 'samples': 19714048, 'steps': 38503, 'loss/train': 1.7979694604873657} +03/05/2022 10:43:34 - INFO - codeparrot_training - Step 38504: {'lr': 0.00042862807078064124, 'samples': 19714560, 'steps': 38504, 'loss/train': 1.5656987428665161} +03/05/2022 10:43:35 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/05/2022 10:43:40 - INFO - codeparrot_training - Step 38505: {'lr': 0.00042862435802054703, 'samples': 19715072, 'steps': 38505, 'loss/train': 1.709275245666504} +03/05/2022 10:43:43 - INFO - codeparrot_training - Step 38506: {'lr': 0.00042862064517996723, 'samples': 19715584, 'steps': 38506, 'loss/train': 2.639859199523926} +03/05/2022 10:43:44 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 10:43:48 - INFO - codeparrot_training - Step 38507: {'lr': 0.00042861693225890385, 'samples': 19716096, 'steps': 38507, 'loss/train': 1.8164652585983276} +03/05/2022 10:43:51 - INFO - codeparrot_training - Step 38508: {'lr': 0.0004286132192573584, 'samples': 19716608, 'steps': 38508, 'loss/train': 0.7617395520210266} +03/05/2022 10:43:52 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/05/2022 10:43:56 - INFO - codeparrot_training - Step 38509: {'lr': 0.0004286095061753326, 'samples': 19717120, 'steps': 38509, 'loss/train': 0.7710505127906799} +03/05/2022 10:44:00 - INFO - codeparrot_training - Step 38510: {'lr': 0.0004286057930128281, 'samples': 19717632, 'steps': 38510, 'loss/train': 1.682903528213501} +03/05/2022 10:44:00 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/05/2022 10:44:05 - INFO - codeparrot_training - Step 38511: {'lr': 0.00042860207976984664, 'samples': 19718144, 'steps': 38511, 'loss/train': 0.6496201753616333} +03/05/2022 10:44:08 - INFO - codeparrot_training - Step 38512: {'lr': 0.00042859836644638976, 'samples': 19718656, 'steps': 38512, 'loss/train': 2.3134231567382812} +03/05/2022 10:44:08 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 10:44:13 - INFO - codeparrot_training - Step 38513: {'lr': 0.00042859465304245927, 'samples': 19719168, 'steps': 38513, 'loss/train': 1.6611865758895874} +03/05/2022 10:44:16 - INFO - codeparrot_training - Step 38514: {'lr': 0.00042859093955805675, 'samples': 19719680, 'steps': 38514, 'loss/train': 2.065661907196045} +03/05/2022 10:44:17 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/05/2022 10:44:22 - INFO - codeparrot_training - Step 38515: {'lr': 0.0004285872259931839, 'samples': 19720192, 'steps': 38515, 'loss/train': 2.535715103149414} +03/05/2022 10:44:25 - INFO - codeparrot_training - Step 38516: {'lr': 0.00042858351234784244, 'samples': 19720704, 'steps': 38516, 'loss/train': 1.2711677551269531} +03/05/2022 10:44:26 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/05/2022 10:44:30 - INFO - codeparrot_training - Step 38517: {'lr': 0.000428579798622034, 'samples': 19721216, 'steps': 38517, 'loss/train': 1.7173043489456177} +03/05/2022 10:44:33 - INFO - codeparrot_training - Step 38518: {'lr': 0.0004285760848157603, 'samples': 19721728, 'steps': 38518, 'loss/train': 2.0105934143066406} +03/05/2022 10:44:34 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 10:44:39 - INFO - codeparrot_training - Step 38519: {'lr': 0.00042857237092902285, 'samples': 19722240, 'steps': 38519, 'loss/train': 1.2959085702896118} +03/05/2022 10:44:42 - INFO - codeparrot_training - Step 38520: {'lr': 0.0004285686569618235, 'samples': 19722752, 'steps': 38520, 'loss/train': 1.2316933870315552} +03/05/2022 10:44:43 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/05/2022 10:44:47 - INFO - codeparrot_training - Step 38521: {'lr': 0.0004285649429141639, 'samples': 19723264, 'steps': 38521, 'loss/train': 1.1378589868545532} +03/05/2022 10:44:50 - INFO - codeparrot_training - Step 38522: {'lr': 0.00042856122878604566, 'samples': 19723776, 'steps': 38522, 'loss/train': 1.3588716983795166} +03/05/2022 10:44:51 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 10:44:56 - INFO - codeparrot_training - Step 38523: {'lr': 0.0004285575145774705, 'samples': 19724288, 'steps': 38523, 'loss/train': 2.0566439628601074} +03/05/2022 10:44:59 - INFO - codeparrot_training - Step 38524: {'lr': 0.00042855380028844004, 'samples': 19724800, 'steps': 38524, 'loss/train': 0.7114560604095459} +03/05/2022 10:44:59 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/05/2022 10:45:04 - INFO - codeparrot_training - Step 38525: {'lr': 0.00042855008591895607, 'samples': 19725312, 'steps': 38525, 'loss/train': 1.9563223123550415} +03/05/2022 10:45:07 - INFO - codeparrot_training - Step 38526: {'lr': 0.00042854637146902007, 'samples': 19725824, 'steps': 38526, 'loss/train': 1.7133357524871826} +03/05/2022 10:45:08 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/05/2022 10:45:12 - INFO - codeparrot_training - Step 38527: {'lr': 0.00042854265693863394, 'samples': 19726336, 'steps': 38527, 'loss/train': 1.6425504684448242} +03/05/2022 10:45:16 - INFO - codeparrot_training - Step 38528: {'lr': 0.00042853894232779924, 'samples': 19726848, 'steps': 38528, 'loss/train': 1.8225189447402954} +03/05/2022 10:45:16 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 10:45:21 - INFO - codeparrot_training - Step 38529: {'lr': 0.00042853522763651767, 'samples': 19727360, 'steps': 38529, 'loss/train': 1.9709175825119019} +03/05/2022 10:45:24 - INFO - codeparrot_training - Step 38530: {'lr': 0.00042853151286479074, 'samples': 19727872, 'steps': 38530, 'loss/train': 1.6245200634002686} +03/05/2022 10:45:25 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 10:45:29 - INFO - codeparrot_training - Step 38531: {'lr': 0.0004285277980126204, 'samples': 19728384, 'steps': 38531, 'loss/train': 1.5797756910324097} +03/05/2022 10:45:33 - INFO - codeparrot_training - Step 38532: {'lr': 0.0004285240830800081, 'samples': 19728896, 'steps': 38532, 'loss/train': 2.071545362472534} +03/05/2022 10:45:33 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/05/2022 10:45:38 - INFO - codeparrot_training - Step 38533: {'lr': 0.00042852036806695565, 'samples': 19729408, 'steps': 38533, 'loss/train': 1.8840982913970947} +03/05/2022 10:45:41 - INFO - codeparrot_training - Step 38534: {'lr': 0.0004285166529734647, 'samples': 19729920, 'steps': 38534, 'loss/train': 2.6631252765655518} +03/05/2022 10:45:42 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/05/2022 10:45:46 - INFO - codeparrot_training - Step 38535: {'lr': 0.0004285129377995369, 'samples': 19730432, 'steps': 38535, 'loss/train': 1.1916284561157227} +03/05/2022 10:45:50 - INFO - codeparrot_training - Step 38536: {'lr': 0.0004285092225451739, 'samples': 19730944, 'steps': 38536, 'loss/train': 1.9457544088363647} +03/05/2022 10:45:50 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 10:45:55 - INFO - codeparrot_training - Step 38537: {'lr': 0.0004285055072103774, 'samples': 19731456, 'steps': 38537, 'loss/train': 1.4813554286956787} +03/05/2022 10:45:58 - INFO - codeparrot_training - Step 38538: {'lr': 0.00042850179179514906, 'samples': 19731968, 'steps': 38538, 'loss/train': 1.2885469198226929} +03/05/2022 10:45:59 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 10:46:03 - INFO - codeparrot_training - Step 38539: {'lr': 0.00042849807629949057, 'samples': 19732480, 'steps': 38539, 'loss/train': 3.231980562210083} +03/05/2022 10:46:06 - INFO - codeparrot_training - Step 38540: {'lr': 0.0004284943607234036, 'samples': 19732992, 'steps': 38540, 'loss/train': 1.4474661350250244} +03/05/2022 10:46:07 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 10:46:12 - INFO - codeparrot_training - Step 38541: {'lr': 0.00042849064506688984, 'samples': 19733504, 'steps': 38541, 'loss/train': 1.5721244812011719} +03/05/2022 10:46:15 - INFO - codeparrot_training - Step 38542: {'lr': 0.00042848692932995094, 'samples': 19734016, 'steps': 38542, 'loss/train': 1.6909945011138916} +03/05/2022 10:46:15 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/05/2022 10:46:20 - INFO - codeparrot_training - Step 38543: {'lr': 0.0004284832135125886, 'samples': 19734528, 'steps': 38543, 'loss/train': 2.103565216064453} +03/05/2022 10:46:24 - INFO - codeparrot_training - Step 38544: {'lr': 0.0004284794976148044, 'samples': 19735040, 'steps': 38544, 'loss/train': 0.9235674738883972} +03/05/2022 10:46:25 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 10:46:29 - INFO - codeparrot_training - Step 38545: {'lr': 0.00042847578163660016, 'samples': 19735552, 'steps': 38545, 'loss/train': 1.887497901916504} +03/05/2022 10:46:32 - INFO - codeparrot_training - Step 38546: {'lr': 0.0004284720655779775, 'samples': 19736064, 'steps': 38546, 'loss/train': 1.1920284032821655} +03/05/2022 10:46:34 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 10:46:37 - INFO - codeparrot_training - Step 38547: {'lr': 0.00042846834943893806, 'samples': 19736576, 'steps': 38547, 'loss/train': 1.0208882093429565} +03/05/2022 10:46:41 - INFO - codeparrot_training - Step 38548: {'lr': 0.0004284646332194836, 'samples': 19737088, 'steps': 38548, 'loss/train': 1.93416166305542} +03/05/2022 10:46:42 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/05/2022 10:46:46 - INFO - codeparrot_training - Step 38549: {'lr': 0.0004284609169196156, 'samples': 19737600, 'steps': 38549, 'loss/train': 1.3960152864456177} +03/05/2022 10:46:49 - INFO - codeparrot_training - Step 38550: {'lr': 0.000428457200539336, 'samples': 19738112, 'steps': 38550, 'loss/train': 1.6571820974349976} +03/05/2022 10:46:50 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/05/2022 10:46:54 - INFO - codeparrot_training - Step 38551: {'lr': 0.0004284534840786463, 'samples': 19738624, 'steps': 38551, 'loss/train': 2.549018621444702} +03/05/2022 10:46:57 - INFO - codeparrot_training - Step 38552: {'lr': 0.0004284497675375482, 'samples': 19739136, 'steps': 38552, 'loss/train': 1.8719671964645386} +03/05/2022 10:46:59 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/05/2022 10:47:03 - INFO - codeparrot_training - Step 38553: {'lr': 0.0004284460509160433, 'samples': 19739648, 'steps': 38553, 'loss/train': 1.4957520961761475} +03/05/2022 10:47:06 - INFO - codeparrot_training - Step 38554: {'lr': 0.0004284423342141335, 'samples': 19740160, 'steps': 38554, 'loss/train': 1.7202802896499634} +03/05/2022 10:47:07 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 10:47:11 - INFO - codeparrot_training - Step 38555: {'lr': 0.0004284386174318202, 'samples': 19740672, 'steps': 38555, 'loss/train': 1.4221858978271484} +03/05/2022 10:47:14 - INFO - codeparrot_training - Step 38556: {'lr': 0.00042843490056910534, 'samples': 19741184, 'steps': 38556, 'loss/train': 2.270695447921753} +03/05/2022 10:47:16 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 10:47:20 - INFO - codeparrot_training - Step 38557: {'lr': 0.00042843118362599045, 'samples': 19741696, 'steps': 38557, 'loss/train': 0.9776784777641296} +03/05/2022 10:47:23 - INFO - codeparrot_training - Step 38558: {'lr': 0.0004284274666024772, 'samples': 19742208, 'steps': 38558, 'loss/train': 1.7199122905731201} +03/05/2022 10:47:25 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 10:47:28 - INFO - codeparrot_training - Step 38559: {'lr': 0.0004284237494985672, 'samples': 19742720, 'steps': 38559, 'loss/train': 2.3708810806274414} +03/05/2022 10:47:31 - INFO - codeparrot_training - Step 38560: {'lr': 0.0004284200323142623, 'samples': 19743232, 'steps': 38560, 'loss/train': 1.0397006273269653} +03/05/2022 10:47:33 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 10:47:36 - INFO - codeparrot_training - Step 38561: {'lr': 0.0004284163150495641, 'samples': 19743744, 'steps': 38561, 'loss/train': 1.5603466033935547} +03/05/2022 10:47:40 - INFO - codeparrot_training - Step 38562: {'lr': 0.00042841259770447427, 'samples': 19744256, 'steps': 38562, 'loss/train': 1.3002595901489258} +03/05/2022 10:47:42 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/05/2022 10:47:45 - INFO - codeparrot_training - Step 38563: {'lr': 0.00042840888027899436, 'samples': 19744768, 'steps': 38563, 'loss/train': 1.3120273351669312} +03/05/2022 10:47:48 - INFO - codeparrot_training - Step 38564: {'lr': 0.0004284051627731263, 'samples': 19745280, 'steps': 38564, 'loss/train': 1.7188363075256348} +03/05/2022 10:47:50 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 10:47:53 - INFO - codeparrot_training - Step 38565: {'lr': 0.0004284014451868716, 'samples': 19745792, 'steps': 38565, 'loss/train': 1.7081656455993652} +03/05/2022 10:47:57 - INFO - codeparrot_training - Step 38566: {'lr': 0.0004283977275202319, 'samples': 19746304, 'steps': 38566, 'loss/train': 1.6099004745483398} +03/05/2022 10:47:59 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 10:48:02 - INFO - codeparrot_training - Step 38567: {'lr': 0.00042839400977320895, 'samples': 19746816, 'steps': 38567, 'loss/train': 2.3459300994873047} +03/05/2022 10:48:05 - INFO - codeparrot_training - Step 38568: {'lr': 0.00042839029194580446, 'samples': 19747328, 'steps': 38568, 'loss/train': 1.554349422454834} +03/05/2022 10:48:08 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 10:48:10 - INFO - codeparrot_training - Step 38569: {'lr': 0.0004283865740380201, 'samples': 19747840, 'steps': 38569, 'loss/train': 0.7890869975090027} +03/05/2022 10:48:13 - INFO - codeparrot_training - Step 38570: {'lr': 0.0004283828560498574, 'samples': 19748352, 'steps': 38570, 'loss/train': 1.869735598564148} +03/05/2022 10:48:16 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/05/2022 10:48:19 - INFO - codeparrot_training - Step 38571: {'lr': 0.0004283791379813181, 'samples': 19748864, 'steps': 38571, 'loss/train': 1.8651947975158691} +03/05/2022 10:48:22 - INFO - codeparrot_training - Step 38572: {'lr': 0.000428375419832404, 'samples': 19749376, 'steps': 38572, 'loss/train': 1.8675826787948608} +03/05/2022 10:48:24 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 10:48:27 - INFO - codeparrot_training - Step 38573: {'lr': 0.0004283717016031167, 'samples': 19749888, 'steps': 38573, 'loss/train': 2.1053853034973145} +03/05/2022 10:48:30 - INFO - codeparrot_training - Step 38574: {'lr': 0.0004283679832934578, 'samples': 19750400, 'steps': 38574, 'loss/train': 0.6932262182235718} +03/05/2022 10:48:33 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/05/2022 10:48:36 - INFO - codeparrot_training - Step 38575: {'lr': 0.0004283642649034291, 'samples': 19750912, 'steps': 38575, 'loss/train': 0.21700908243656158} +03/05/2022 10:48:39 - INFO - codeparrot_training - Step 38576: {'lr': 0.00042836054643303226, 'samples': 19751424, 'steps': 38576, 'loss/train': 1.4597054719924927} +03/05/2022 10:48:41 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 10:48:44 - INFO - codeparrot_training - Step 38577: {'lr': 0.0004283568278822688, 'samples': 19751936, 'steps': 38577, 'loss/train': 1.5350946187973022} +03/05/2022 10:48:47 - INFO - codeparrot_training - Step 38578: {'lr': 0.0004283531092511405, 'samples': 19752448, 'steps': 38578, 'loss/train': 2.6010589599609375} +03/05/2022 10:48:50 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 10:48:52 - INFO - codeparrot_training - Step 38579: {'lr': 0.0004283493905396491, 'samples': 19752960, 'steps': 38579, 'loss/train': 1.8193868398666382} +03/05/2022 10:48:56 - INFO - codeparrot_training - Step 38580: {'lr': 0.00042834567174779623, 'samples': 19753472, 'steps': 38580, 'loss/train': 1.417906403541565} +03/05/2022 10:48:58 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/05/2022 10:49:01 - INFO - codeparrot_training - Step 38581: {'lr': 0.00042834195287558356, 'samples': 19753984, 'steps': 38581, 'loss/train': 2.005946636199951} +03/05/2022 10:49:04 - INFO - codeparrot_training - Step 38582: {'lr': 0.00042833823392301264, 'samples': 19754496, 'steps': 38582, 'loss/train': 1.4937937259674072} +03/05/2022 10:49:07 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 10:49:09 - INFO - codeparrot_training - Step 38583: {'lr': 0.00042833451489008537, 'samples': 19755008, 'steps': 38583, 'loss/train': 2.0779855251312256} +03/05/2022 10:49:13 - INFO - codeparrot_training - Step 38584: {'lr': 0.00042833079577680327, 'samples': 19755520, 'steps': 38584, 'loss/train': 1.7298463582992554} +03/05/2022 10:49:15 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 10:49:18 - INFO - codeparrot_training - Step 38585: {'lr': 0.0004283270765831682, 'samples': 19756032, 'steps': 38585, 'loss/train': 1.1656309366226196} +03/05/2022 10:49:21 - INFO - codeparrot_training - Step 38586: {'lr': 0.00042832335730918147, 'samples': 19756544, 'steps': 38586, 'loss/train': 1.3866567611694336} +03/05/2022 10:49:23 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 10:49:26 - INFO - codeparrot_training - Step 38587: {'lr': 0.0004283196379548451, 'samples': 19757056, 'steps': 38587, 'loss/train': 1.151247501373291} +03/05/2022 10:49:29 - INFO - codeparrot_training - Step 38588: {'lr': 0.0004283159185201607, 'samples': 19757568, 'steps': 38588, 'loss/train': 0.9604315161705017} +03/05/2022 10:49:32 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 10:49:35 - INFO - codeparrot_training - Step 38589: {'lr': 0.00042831219900512984, 'samples': 19758080, 'steps': 38589, 'loss/train': 1.278340220451355} +03/05/2022 10:49:38 - INFO - codeparrot_training - Step 38590: {'lr': 0.0004283084794097543, 'samples': 19758592, 'steps': 38590, 'loss/train': 2.15201997756958} +03/05/2022 10:49:40 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/05/2022 10:49:43 - INFO - codeparrot_training - Step 38591: {'lr': 0.00042830475973403573, 'samples': 19759104, 'steps': 38591, 'loss/train': 1.961279273033142} +03/05/2022 10:49:46 - INFO - codeparrot_training - Step 38592: {'lr': 0.0004283010399779757, 'samples': 19759616, 'steps': 38592, 'loss/train': 1.8723347187042236} +03/05/2022 10:49:49 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 10:49:52 - INFO - codeparrot_training - Step 38593: {'lr': 0.000428297320141576, 'samples': 19760128, 'steps': 38593, 'loss/train': 1.3374814987182617} +03/05/2022 10:49:55 - INFO - codeparrot_training - Step 38594: {'lr': 0.0004282936002248383, 'samples': 19760640, 'steps': 38594, 'loss/train': 1.701419472694397} +03/05/2022 10:49:57 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/05/2022 10:50:00 - INFO - codeparrot_training - Step 38595: {'lr': 0.00042828988022776426, 'samples': 19761152, 'steps': 38595, 'loss/train': 1.9581539630889893} +03/05/2022 10:50:03 - INFO - codeparrot_training - Step 38596: {'lr': 0.00042828616015035554, 'samples': 19761664, 'steps': 38596, 'loss/train': 2.0897247791290283} +03/05/2022 10:50:05 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 10:50:08 - INFO - codeparrot_training - Step 38597: {'lr': 0.00042828243999261384, 'samples': 19762176, 'steps': 38597, 'loss/train': 1.3530045747756958} +03/05/2022 10:50:12 - INFO - codeparrot_training - Step 38598: {'lr': 0.0004282787197545408, 'samples': 19762688, 'steps': 38598, 'loss/train': 1.7269246578216553} +03/05/2022 10:50:14 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/05/2022 10:50:17 - INFO - codeparrot_training - Step 38599: {'lr': 0.00042827499943613815, 'samples': 19763200, 'steps': 38599, 'loss/train': 1.3811802864074707} +03/05/2022 10:50:20 - INFO - codeparrot_training - Step 38600: {'lr': 0.00042827127903740747, 'samples': 19763712, 'steps': 38600, 'loss/train': 1.5921295881271362} +03/05/2022 10:50:22 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/05/2022 10:50:25 - INFO - codeparrot_training - Step 38601: {'lr': 0.00042826755855835053, 'samples': 19764224, 'steps': 38601, 'loss/train': 2.2554805278778076} +03/05/2022 10:50:28 - INFO - codeparrot_training - Step 38602: {'lr': 0.00042826383799896906, 'samples': 19764736, 'steps': 38602, 'loss/train': 1.2390189170837402} +03/05/2022 10:50:30 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 10:50:34 - INFO - codeparrot_training - Step 38603: {'lr': 0.0004282601173592646, 'samples': 19765248, 'steps': 38603, 'loss/train': 1.8256912231445312} +03/05/2022 10:50:37 - INFO - codeparrot_training - Step 38604: {'lr': 0.0004282563966392389, 'samples': 19765760, 'steps': 38604, 'loss/train': 1.633636236190796} +03/05/2022 10:50:39 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 10:50:42 - INFO - codeparrot_training - Step 38605: {'lr': 0.00042825267583889354, 'samples': 19766272, 'steps': 38605, 'loss/train': 1.9993391036987305} +03/05/2022 10:50:45 - INFO - codeparrot_training - Step 38606: {'lr': 0.00042824895495823033, 'samples': 19766784, 'steps': 38606, 'loss/train': 1.3982343673706055} +03/05/2022 10:50:47 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/05/2022 10:50:51 - INFO - codeparrot_training - Step 38607: {'lr': 0.0004282452339972509, 'samples': 19767296, 'steps': 38607, 'loss/train': 0.9934396743774414} +03/05/2022 10:50:54 - INFO - codeparrot_training - Step 38608: {'lr': 0.00042824151295595695, 'samples': 19767808, 'steps': 38608, 'loss/train': 1.7043061256408691} +03/05/2022 10:50:56 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 10:50:59 - INFO - codeparrot_training - Step 38609: {'lr': 0.0004282377918343501, 'samples': 19768320, 'steps': 38609, 'loss/train': 1.615700125694275} +03/05/2022 10:51:02 - INFO - codeparrot_training - Step 38610: {'lr': 0.00042823407063243197, 'samples': 19768832, 'steps': 38610, 'loss/train': 2.120128870010376} +03/05/2022 10:51:04 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/05/2022 10:51:08 - INFO - codeparrot_training - Step 38611: {'lr': 0.0004282303493502044, 'samples': 19769344, 'steps': 38611, 'loss/train': 1.086856484413147} +03/05/2022 10:51:11 - INFO - codeparrot_training - Step 38612: {'lr': 0.000428226627987669, 'samples': 19769856, 'steps': 38612, 'loss/train': 2.3484725952148438} +03/05/2022 10:51:13 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/05/2022 10:51:16 - INFO - codeparrot_training - Step 38613: {'lr': 0.0004282229065448273, 'samples': 19770368, 'steps': 38613, 'loss/train': 1.9675817489624023} +03/05/2022 10:51:19 - INFO - codeparrot_training - Step 38614: {'lr': 0.0004282191850216812, 'samples': 19770880, 'steps': 38614, 'loss/train': 1.7232468128204346} +03/05/2022 10:51:21 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 10:51:25 - INFO - codeparrot_training - Step 38615: {'lr': 0.00042821546341823236, 'samples': 19771392, 'steps': 38615, 'loss/train': 1.9659581184387207} +03/05/2022 10:51:28 - INFO - codeparrot_training - Step 38616: {'lr': 0.0004282117417344823, 'samples': 19771904, 'steps': 38616, 'loss/train': 1.4936696290969849} +03/05/2022 10:51:29 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 10:51:33 - INFO - codeparrot_training - Step 38617: {'lr': 0.00042820801997043277, 'samples': 19772416, 'steps': 38617, 'loss/train': 1.7274147272109985} +03/05/2022 10:51:36 - INFO - codeparrot_training - Step 38618: {'lr': 0.0004282042981260855, 'samples': 19772928, 'steps': 38618, 'loss/train': 1.0664881467819214} +03/05/2022 10:51:38 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 10:51:41 - INFO - codeparrot_training - Step 38619: {'lr': 0.00042820057620144214, 'samples': 19773440, 'steps': 38619, 'loss/train': 1.968578815460205} +03/05/2022 10:51:45 - INFO - codeparrot_training - Step 38620: {'lr': 0.00042819685419650427, 'samples': 19773952, 'steps': 38620, 'loss/train': 1.3703964948654175} +03/05/2022 10:51:46 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/05/2022 10:51:50 - INFO - codeparrot_training - Step 38621: {'lr': 0.0004281931321112737, 'samples': 19774464, 'steps': 38621, 'loss/train': 2.0736982822418213} +03/05/2022 10:51:53 - INFO - codeparrot_training - Step 38622: {'lr': 0.0004281894099457521, 'samples': 19774976, 'steps': 38622, 'loss/train': 1.7980878353118896} +03/05/2022 10:51:55 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/05/2022 10:51:58 - INFO - codeparrot_training - Step 38623: {'lr': 0.00042818568769994103, 'samples': 19775488, 'steps': 38623, 'loss/train': 1.9836076498031616} +03/05/2022 10:52:02 - INFO - codeparrot_training - Step 38624: {'lr': 0.00042818196537384225, 'samples': 19776000, 'steps': 38624, 'loss/train': 1.7218974828720093} +03/05/2022 10:52:03 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 10:52:07 - INFO - codeparrot_training - Step 38625: {'lr': 0.0004281782429674574, 'samples': 19776512, 'steps': 38625, 'loss/train': 1.5606465339660645} +03/05/2022 10:52:10 - INFO - codeparrot_training - Step 38626: {'lr': 0.0004281745204807882, 'samples': 19777024, 'steps': 38626, 'loss/train': 1.6349729299545288} +03/05/2022 10:52:11 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 10:52:16 - INFO - codeparrot_training - Step 38627: {'lr': 0.00042817079791383636, 'samples': 19777536, 'steps': 38627, 'loss/train': 1.1947542428970337} +03/05/2022 10:52:19 - INFO - codeparrot_training - Step 38628: {'lr': 0.00042816707526660346, 'samples': 19778048, 'steps': 38628, 'loss/train': 2.1859982013702393} +03/05/2022 10:52:22 - INFO - codeparrot_training - Step 38629: {'lr': 0.00042816335253909125, 'samples': 19778560, 'steps': 38629, 'loss/train': 1.2326762676239014} +03/05/2022 10:52:23 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 10:52:27 - INFO - codeparrot_training - Step 38630: {'lr': 0.00042815962973130134, 'samples': 19779072, 'steps': 38630, 'loss/train': 1.9942691326141357} +03/05/2022 10:52:31 - INFO - codeparrot_training - Step 38631: {'lr': 0.00042815590684323554, 'samples': 19779584, 'steps': 38631, 'loss/train': 1.3765051364898682} +03/05/2022 10:52:32 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/05/2022 10:52:36 - INFO - codeparrot_training - Step 38632: {'lr': 0.00042815218387489535, 'samples': 19780096, 'steps': 38632, 'loss/train': 1.4062714576721191} +03/05/2022 10:52:39 - INFO - codeparrot_training - Step 38633: {'lr': 0.00042814846082628256, 'samples': 19780608, 'steps': 38633, 'loss/train': 1.376347541809082} +03/05/2022 10:52:40 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/05/2022 10:52:44 - INFO - codeparrot_training - Step 38634: {'lr': 0.0004281447376973988, 'samples': 19781120, 'steps': 38634, 'loss/train': 0.3564392626285553} +03/05/2022 10:52:48 - INFO - codeparrot_training - Step 38635: {'lr': 0.00042814101448824583, 'samples': 19781632, 'steps': 38635, 'loss/train': 0.6985551714897156} +03/05/2022 10:52:49 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/05/2022 10:52:53 - INFO - codeparrot_training - Step 38636: {'lr': 0.0004281372911988253, 'samples': 19782144, 'steps': 38636, 'loss/train': 1.8874304294586182} +03/05/2022 10:52:56 - INFO - codeparrot_training - Step 38637: {'lr': 0.0004281335678291387, 'samples': 19782656, 'steps': 38637, 'loss/train': 0.5189992785453796} +03/05/2022 10:52:57 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/05/2022 10:53:01 - INFO - codeparrot_training - Step 38638: {'lr': 0.000428129844379188, 'samples': 19783168, 'steps': 38638, 'loss/train': 2.2223095893859863} +03/05/2022 10:53:04 - INFO - codeparrot_training - Step 38639: {'lr': 0.0004281261208489747, 'samples': 19783680, 'steps': 38639, 'loss/train': 1.2589397430419922} +03/05/2022 10:53:06 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 10:53:10 - INFO - codeparrot_training - Step 38640: {'lr': 0.0004281223972385004, 'samples': 19784192, 'steps': 38640, 'loss/train': 2.13358736038208} +03/05/2022 10:53:13 - INFO - codeparrot_training - Step 38641: {'lr': 0.00042811867354776705, 'samples': 19784704, 'steps': 38641, 'loss/train': 1.143943428993225} +03/05/2022 10:53:14 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/05/2022 10:53:18 - INFO - codeparrot_training - Step 38642: {'lr': 0.0004281149497767761, 'samples': 19785216, 'steps': 38642, 'loss/train': 2.351257801055908} +03/05/2022 10:53:21 - INFO - codeparrot_training - Step 38643: {'lr': 0.00042811122592552943, 'samples': 19785728, 'steps': 38643, 'loss/train': 1.195041298866272} +03/05/2022 10:53:23 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 10:53:27 - INFO - codeparrot_training - Step 38644: {'lr': 0.0004281075019940285, 'samples': 19786240, 'steps': 38644, 'loss/train': 1.746843695640564} +03/05/2022 10:53:30 - INFO - codeparrot_training - Step 38645: {'lr': 0.00042810377798227506, 'samples': 19786752, 'steps': 38645, 'loss/train': 1.6408380270004272} +03/05/2022 10:53:32 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/05/2022 10:53:35 - INFO - codeparrot_training - Step 38646: {'lr': 0.00042810005389027077, 'samples': 19787264, 'steps': 38646, 'loss/train': 1.030151128768921} +03/05/2022 10:53:38 - INFO - codeparrot_training - Step 38647: {'lr': 0.0004280963297180174, 'samples': 19787776, 'steps': 38647, 'loss/train': 2.090975761413574} +03/05/2022 10:53:40 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 10:53:44 - INFO - codeparrot_training - Step 38648: {'lr': 0.0004280926054655165, 'samples': 19788288, 'steps': 38648, 'loss/train': 1.7622849941253662} +03/05/2022 10:53:47 - INFO - codeparrot_training - Step 38649: {'lr': 0.00042808888113277, 'samples': 19788800, 'steps': 38649, 'loss/train': 1.4865052700042725} +03/05/2022 10:53:48 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 10:53:52 - INFO - codeparrot_training - Step 38650: {'lr': 0.0004280851567197792, 'samples': 19789312, 'steps': 38650, 'loss/train': 2.0519843101501465} +03/05/2022 10:53:55 - INFO - codeparrot_training - Step 38651: {'lr': 0.0004280814322265461, 'samples': 19789824, 'steps': 38651, 'loss/train': 1.7303141355514526} +03/05/2022 10:53:57 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 10:54:00 - INFO - codeparrot_training - Step 38652: {'lr': 0.00042807770765307217, 'samples': 19790336, 'steps': 38652, 'loss/train': 1.8099050521850586} +03/05/2022 10:54:03 - INFO - codeparrot_training - Step 38653: {'lr': 0.00042807398299935927, 'samples': 19790848, 'steps': 38653, 'loss/train': 1.7953767776489258} +03/05/2022 10:54:05 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 10:54:09 - INFO - codeparrot_training - Step 38654: {'lr': 0.0004280702582654089, 'samples': 19791360, 'steps': 38654, 'loss/train': 1.7514424324035645} +03/05/2022 10:54:12 - INFO - codeparrot_training - Step 38655: {'lr': 0.00042806653345122287, 'samples': 19791872, 'steps': 38655, 'loss/train': 2.2963011264801025} +03/05/2022 10:54:13 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 10:54:18 - INFO - codeparrot_training - Step 38656: {'lr': 0.0004280628085568028, 'samples': 19792384, 'steps': 38656, 'loss/train': 2.1349875926971436} +03/05/2022 10:54:21 - INFO - codeparrot_training - Step 38657: {'lr': 0.0004280590835821503, 'samples': 19792896, 'steps': 38657, 'loss/train': 1.9335001707077026} +03/05/2022 10:54:24 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 10:54:26 - INFO - codeparrot_training - Step 38658: {'lr': 0.0004280553585272672, 'samples': 19793408, 'steps': 38658, 'loss/train': 2.258563756942749} +03/05/2022 10:54:29 - INFO - codeparrot_training - Step 38659: {'lr': 0.0004280516333921551, 'samples': 19793920, 'steps': 38659, 'loss/train': 1.7108283042907715} +03/05/2022 10:54:32 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/05/2022 10:54:34 - INFO - codeparrot_training - Step 38660: {'lr': 0.00042804790817681574, 'samples': 19794432, 'steps': 38660, 'loss/train': 1.5322532653808594} +03/05/2022 10:54:38 - INFO - codeparrot_training - Step 38661: {'lr': 0.0004280441828812506, 'samples': 19794944, 'steps': 38661, 'loss/train': 2.0229928493499756} +03/05/2022 10:54:40 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/05/2022 10:54:43 - INFO - codeparrot_training - Step 38662: {'lr': 0.0004280404575054616, 'samples': 19795456, 'steps': 38662, 'loss/train': 2.127798557281494} +03/05/2022 10:54:46 - INFO - codeparrot_training - Step 38663: {'lr': 0.00042803673204945027, 'samples': 19795968, 'steps': 38663, 'loss/train': 1.6988143920898438} +03/05/2022 10:54:49 - INFO - codeparrot_training - Step 38664: {'lr': 0.0004280330065132184, 'samples': 19796480, 'steps': 38664, 'loss/train': 1.4765651226043701} +03/05/2022 10:54:49 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 10:54:55 - INFO - codeparrot_training - Step 38665: {'lr': 0.0004280292808967675, 'samples': 19796992, 'steps': 38665, 'loss/train': 1.7284808158874512} +03/05/2022 10:54:58 - INFO - codeparrot_training - Step 38666: {'lr': 0.00042802555520009945, 'samples': 19797504, 'steps': 38666, 'loss/train': 1.4485489130020142} +03/05/2022 10:54:58 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 10:55:03 - INFO - codeparrot_training - Step 38667: {'lr': 0.00042802182942321576, 'samples': 19798016, 'steps': 38667, 'loss/train': 1.9016871452331543} +03/05/2022 10:55:06 - INFO - codeparrot_training - Step 38668: {'lr': 0.0004280181035661182, 'samples': 19798528, 'steps': 38668, 'loss/train': 1.8893183469772339} +03/05/2022 10:55:06 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/05/2022 10:55:12 - INFO - codeparrot_training - Step 38669: {'lr': 0.0004280143776288085, 'samples': 19799040, 'steps': 38669, 'loss/train': 1.8285330533981323} +03/05/2022 10:55:15 - INFO - codeparrot_training - Step 38670: {'lr': 0.00042801065161128814, 'samples': 19799552, 'steps': 38670, 'loss/train': 1.621121883392334} +03/05/2022 10:55:15 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 10:55:20 - INFO - codeparrot_training - Step 38671: {'lr': 0.000428006925513559, 'samples': 19800064, 'steps': 38671, 'loss/train': 1.0969856977462769} +03/05/2022 10:55:23 - INFO - codeparrot_training - Step 38672: {'lr': 0.0004280031993356227, 'samples': 19800576, 'steps': 38672, 'loss/train': 1.7955394983291626} +03/05/2022 10:55:23 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/05/2022 10:55:29 - INFO - codeparrot_training - Step 38673: {'lr': 0.00042799947307748087, 'samples': 19801088, 'steps': 38673, 'loss/train': 1.721825361251831} +03/05/2022 10:55:32 - INFO - codeparrot_training - Step 38674: {'lr': 0.0004279957467391353, 'samples': 19801600, 'steps': 38674, 'loss/train': 1.8524703979492188} +03/05/2022 10:55:32 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 10:55:37 - INFO - codeparrot_training - Step 38675: {'lr': 0.0004279920203205875, 'samples': 19802112, 'steps': 38675, 'loss/train': 1.9428575038909912} +03/05/2022 10:55:40 - INFO - codeparrot_training - Step 38676: {'lr': 0.0004279882938218393, 'samples': 19802624, 'steps': 38676, 'loss/train': 1.6895110607147217} +03/05/2022 10:55:41 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 10:55:46 - INFO - codeparrot_training - Step 38677: {'lr': 0.00042798456724289227, 'samples': 19803136, 'steps': 38677, 'loss/train': 1.3026678562164307} +03/05/2022 10:55:49 - INFO - codeparrot_training - Step 38678: {'lr': 0.0004279808405837482, 'samples': 19803648, 'steps': 38678, 'loss/train': 1.7309794425964355} +03/05/2022 10:55:49 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 10:55:54 - INFO - codeparrot_training - Step 38679: {'lr': 0.00042797711384440863, 'samples': 19804160, 'steps': 38679, 'loss/train': 0.68036288022995} +03/05/2022 10:55:57 - INFO - codeparrot_training - Step 38680: {'lr': 0.0004279733870248754, 'samples': 19804672, 'steps': 38680, 'loss/train': 0.11509339511394501} +03/05/2022 10:55:58 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/05/2022 10:56:03 - INFO - codeparrot_training - Step 38681: {'lr': 0.00042796966012515007, 'samples': 19805184, 'steps': 38681, 'loss/train': 1.5195715427398682} +03/05/2022 10:56:06 - INFO - codeparrot_training - Step 38682: {'lr': 0.00042796593314523435, 'samples': 19805696, 'steps': 38682, 'loss/train': 1.5905025005340576} +03/05/2022 10:56:06 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/05/2022 10:56:11 - INFO - codeparrot_training - Step 38683: {'lr': 0.0004279622060851299, 'samples': 19806208, 'steps': 38683, 'loss/train': 1.454941749572754} +03/05/2022 10:56:14 - INFO - codeparrot_training - Step 38684: {'lr': 0.0004279584789448385, 'samples': 19806720, 'steps': 38684, 'loss/train': 1.9202710390090942} +03/05/2022 10:56:14 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/05/2022 10:56:20 - INFO - codeparrot_training - Step 38685: {'lr': 0.0004279547517243617, 'samples': 19807232, 'steps': 38685, 'loss/train': 1.9013594388961792} +03/05/2022 10:56:23 - INFO - codeparrot_training - Step 38686: {'lr': 0.00042795102442370127, 'samples': 19807744, 'steps': 38686, 'loss/train': 1.8321306705474854} +03/05/2022 10:56:25 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/05/2022 10:56:28 - INFO - codeparrot_training - Step 38687: {'lr': 0.0004279472970428588, 'samples': 19808256, 'steps': 38687, 'loss/train': 0.45099449157714844} +03/05/2022 10:56:31 - INFO - codeparrot_training - Step 38688: {'lr': 0.0004279435695818361, 'samples': 19808768, 'steps': 38688, 'loss/train': 2.146911144256592} +03/05/2022 10:56:33 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/05/2022 10:56:37 - INFO - codeparrot_training - Step 38689: {'lr': 0.00042793984204063477, 'samples': 19809280, 'steps': 38689, 'loss/train': 0.9639469385147095} +03/05/2022 10:56:40 - INFO - codeparrot_training - Step 38690: {'lr': 0.0004279361144192565, 'samples': 19809792, 'steps': 38690, 'loss/train': 2.060373306274414} +03/05/2022 10:56:42 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/05/2022 10:56:45 - INFO - codeparrot_training - Step 38691: {'lr': 0.00042793238671770285, 'samples': 19810304, 'steps': 38691, 'loss/train': 1.6125714778900146} +03/05/2022 10:56:48 - INFO - codeparrot_training - Step 38692: {'lr': 0.0004279286589359757, 'samples': 19810816, 'steps': 38692, 'loss/train': 2.0662548542022705} +03/05/2022 10:56:50 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/05/2022 10:56:53 - INFO - codeparrot_training - Step 38693: {'lr': 0.00042792493107407666, 'samples': 19811328, 'steps': 38693, 'loss/train': 1.3314414024353027} +03/05/2022 10:56:57 - INFO - codeparrot_training - Step 38694: {'lr': 0.0004279212031320073, 'samples': 19811840, 'steps': 38694, 'loss/train': 1.3986852169036865} +03/05/2022 10:56:58 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 10:57:02 - INFO - codeparrot_training - Step 38695: {'lr': 0.00042791747510976955, 'samples': 19812352, 'steps': 38695, 'loss/train': 2.0740137100219727} +03/05/2022 10:57:05 - INFO - codeparrot_training - Step 38696: {'lr': 0.0004279137470073648, 'samples': 19812864, 'steps': 38696, 'loss/train': 1.3599504232406616} +03/05/2022 10:57:07 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/05/2022 10:57:10 - INFO - codeparrot_training - Step 38697: {'lr': 0.00042791001882479485, 'samples': 19813376, 'steps': 38697, 'loss/train': 1.604630470275879} +03/05/2022 10:57:13 - INFO - codeparrot_training - Step 38698: {'lr': 0.0004279062905620614, 'samples': 19813888, 'steps': 38698, 'loss/train': 2.0248091220855713} +03/05/2022 10:57:15 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 10:57:19 - INFO - codeparrot_training - Step 38699: {'lr': 0.0004279025622191662, 'samples': 19814400, 'steps': 38699, 'loss/train': 1.6676723957061768} +03/05/2022 10:57:22 - INFO - codeparrot_training - Step 38700: {'lr': 0.00042789883379611084, 'samples': 19814912, 'steps': 38700, 'loss/train': 1.0747770071029663} +03/05/2022 10:57:23 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/05/2022 10:57:27 - INFO - codeparrot_training - Step 38701: {'lr': 0.000427895105292897, 'samples': 19815424, 'steps': 38701, 'loss/train': 2.4207370281219482} +03/05/2022 10:57:30 - INFO - codeparrot_training - Step 38702: {'lr': 0.00042789137670952627, 'samples': 19815936, 'steps': 38702, 'loss/train': 1.5905373096466064} +03/05/2022 10:57:32 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/05/2022 10:57:36 - INFO - codeparrot_training - Step 38703: {'lr': 0.00042788764804600055, 'samples': 19816448, 'steps': 38703, 'loss/train': 1.7638393640518188} +03/05/2022 10:57:39 - INFO - codeparrot_training - Step 38704: {'lr': 0.0004278839193023214, 'samples': 19816960, 'steps': 38704, 'loss/train': 2.8547465801239014} +03/05/2022 10:57:40 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/05/2022 10:57:44 - INFO - codeparrot_training - Step 38705: {'lr': 0.0004278801904784904, 'samples': 19817472, 'steps': 38705, 'loss/train': 1.4887999296188354} +03/05/2022 10:57:47 - INFO - codeparrot_training - Step 38706: {'lr': 0.00042787646157450946, 'samples': 19817984, 'steps': 38706, 'loss/train': 1.9794522523880005} +03/05/2022 10:57:49 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/05/2022 10:57:53 - INFO - codeparrot_training - Step 38707: {'lr': 0.00042787273259038, 'samples': 19818496, 'steps': 38707, 'loss/train': 1.415533185005188} +03/05/2022 10:57:56 - INFO - codeparrot_training - Step 38708: {'lr': 0.00042786900352610393, 'samples': 19819008, 'steps': 38708, 'loss/train': 1.7790294885635376} +03/05/2022 10:57:57 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 10:58:01 - INFO - codeparrot_training - Step 38709: {'lr': 0.0004278652743816828, 'samples': 19819520, 'steps': 38709, 'loss/train': 1.8970950841903687} +03/05/2022 10:58:05 - INFO - codeparrot_training - Step 38710: {'lr': 0.00042786154515711826, 'samples': 19820032, 'steps': 38710, 'loss/train': 0.8318291306495667} +03/05/2022 10:58:08 - INFO - codeparrot_training - Step 38711: {'lr': 0.0004278578158524121, 'samples': 19820544, 'steps': 38711, 'loss/train': 1.8872073888778687} +03/05/2022 10:58:08 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 10:58:13 - INFO - codeparrot_training - Step 38712: {'lr': 0.00042785408646756594, 'samples': 19821056, 'steps': 38712, 'loss/train': 1.2877286672592163} +03/05/2022 10:58:16 - INFO - codeparrot_training - Step 38713: {'lr': 0.0004278503570025816, 'samples': 19821568, 'steps': 38713, 'loss/train': 1.9119843244552612} +03/05/2022 10:58:16 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 10:58:21 - INFO - codeparrot_training - Step 38714: {'lr': 0.0004278466274574605, 'samples': 19822080, 'steps': 38714, 'loss/train': 0.7957173585891724} +03/05/2022 10:58:25 - INFO - codeparrot_training - Step 38715: {'lr': 0.0004278428978322044, 'samples': 19822592, 'steps': 38715, 'loss/train': 2.0226762294769287} +03/05/2022 10:58:25 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 10:58:30 - INFO - codeparrot_training - Step 38716: {'lr': 0.00042783916812681516, 'samples': 19823104, 'steps': 38716, 'loss/train': 1.6795921325683594} +03/05/2022 10:58:33 - INFO - codeparrot_training - Step 38717: {'lr': 0.0004278354383412943, 'samples': 19823616, 'steps': 38717, 'loss/train': 2.1372222900390625} +03/05/2022 10:58:33 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 10:58:38 - INFO - codeparrot_training - Step 38718: {'lr': 0.0004278317084756435, 'samples': 19824128, 'steps': 38718, 'loss/train': 1.7832385301589966} +03/05/2022 10:58:41 - INFO - codeparrot_training - Step 38719: {'lr': 0.00042782797852986454, 'samples': 19824640, 'steps': 38719, 'loss/train': 1.4587024450302124} +03/05/2022 10:58:42 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 10:58:47 - INFO - codeparrot_training - Step 38720: {'lr': 0.00042782424850395894, 'samples': 19825152, 'steps': 38720, 'loss/train': 2.503936529159546} +03/05/2022 10:58:50 - INFO - codeparrot_training - Step 38721: {'lr': 0.00042782051839792857, 'samples': 19825664, 'steps': 38721, 'loss/train': 2.0494422912597656} +03/05/2022 10:58:50 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/05/2022 10:58:55 - INFO - codeparrot_training - Step 38722: {'lr': 0.000427816788211775, 'samples': 19826176, 'steps': 38722, 'loss/train': 2.220364570617676} +03/05/2022 10:58:58 - INFO - codeparrot_training - Step 38723: {'lr': 0.00042781305794549994, 'samples': 19826688, 'steps': 38723, 'loss/train': 1.3216047286987305} +03/05/2022 10:58:59 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/05/2022 10:59:04 - INFO - codeparrot_training - Step 38724: {'lr': 0.00042780932759910504, 'samples': 19827200, 'steps': 38724, 'loss/train': 1.5483509302139282} +03/05/2022 10:59:07 - INFO - codeparrot_training - Step 38725: {'lr': 0.00042780559717259194, 'samples': 19827712, 'steps': 38725, 'loss/train': 2.1423680782318115} +03/05/2022 10:59:07 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 10:59:12 - INFO - codeparrot_training - Step 38726: {'lr': 0.0004278018666659624, 'samples': 19828224, 'steps': 38726, 'loss/train': 1.7244632244110107} +03/05/2022 10:59:15 - INFO - codeparrot_training - Step 38727: {'lr': 0.0004277981360792182, 'samples': 19828736, 'steps': 38727, 'loss/train': 1.848299503326416} +03/05/2022 10:59:15 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 10:59:21 - INFO - codeparrot_training - Step 38728: {'lr': 0.0004277944054123608, 'samples': 19829248, 'steps': 38728, 'loss/train': 1.1198168992996216} +03/05/2022 10:59:24 - INFO - codeparrot_training - Step 38729: {'lr': 0.000427790674665392, 'samples': 19829760, 'steps': 38729, 'loss/train': 0.4736887514591217} +03/05/2022 10:59:24 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 10:59:29 - INFO - codeparrot_training - Step 38730: {'lr': 0.00042778694383831354, 'samples': 19830272, 'steps': 38730, 'loss/train': 1.9365711212158203} +03/05/2022 10:59:32 - INFO - codeparrot_training - Step 38731: {'lr': 0.0004277832129311269, 'samples': 19830784, 'steps': 38731, 'loss/train': 1.593790054321289} +03/05/2022 10:59:33 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 10:59:38 - INFO - codeparrot_training - Step 38732: {'lr': 0.000427779481943834, 'samples': 19831296, 'steps': 38732, 'loss/train': 1.5448980331420898} +03/05/2022 10:59:41 - INFO - codeparrot_training - Step 38733: {'lr': 0.0004277757508764363, 'samples': 19831808, 'steps': 38733, 'loss/train': 1.2443255186080933} +03/05/2022 10:59:42 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/05/2022 10:59:46 - INFO - codeparrot_training - Step 38734: {'lr': 0.00042777201972893564, 'samples': 19832320, 'steps': 38734, 'loss/train': 2.218656063079834} +03/05/2022 10:59:49 - INFO - codeparrot_training - Step 38735: {'lr': 0.00042776828850133364, 'samples': 19832832, 'steps': 38735, 'loss/train': 1.4080146551132202} +03/05/2022 10:59:51 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 10:59:55 - INFO - codeparrot_training - Step 38736: {'lr': 0.0004277645571936321, 'samples': 19833344, 'steps': 38736, 'loss/train': 1.1761796474456787} +03/05/2022 10:59:58 - INFO - codeparrot_training - Step 38737: {'lr': 0.0004277608258058324, 'samples': 19833856, 'steps': 38737, 'loss/train': 1.2972103357315063} +03/05/2022 10:59:59 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 11:00:03 - INFO - codeparrot_training - Step 38738: {'lr': 0.00042775709433793657, 'samples': 19834368, 'steps': 38738, 'loss/train': 2.1323394775390625} +03/05/2022 11:00:06 - INFO - codeparrot_training - Step 38739: {'lr': 0.0004277533627899461, 'samples': 19834880, 'steps': 38739, 'loss/train': 1.8756883144378662} +03/05/2022 11:00:08 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 11:00:11 - INFO - codeparrot_training - Step 38740: {'lr': 0.00042774963116186274, 'samples': 19835392, 'steps': 38740, 'loss/train': 0.9966974258422852} +03/05/2022 11:00:15 - INFO - codeparrot_training - Step 38741: {'lr': 0.000427745899453688, 'samples': 19835904, 'steps': 38741, 'loss/train': 1.609127163887024} +03/05/2022 11:00:16 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/05/2022 11:00:20 - INFO - codeparrot_training - Step 38742: {'lr': 0.00042774216766542386, 'samples': 19836416, 'steps': 38742, 'loss/train': 1.3716981410980225} +03/05/2022 11:00:23 - INFO - codeparrot_training - Step 38743: {'lr': 0.0004277384357970717, 'samples': 19836928, 'steps': 38743, 'loss/train': 1.5405213832855225} +03/05/2022 11:00:24 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/05/2022 11:00:28 - INFO - codeparrot_training - Step 38744: {'lr': 0.00042773470384863344, 'samples': 19837440, 'steps': 38744, 'loss/train': 1.5490622520446777} +03/05/2022 11:00:32 - INFO - codeparrot_training - Step 38745: {'lr': 0.0004277309718201107, 'samples': 19837952, 'steps': 38745, 'loss/train': 1.298251986503601} +03/05/2022 11:00:33 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 11:00:37 - INFO - codeparrot_training - Step 38746: {'lr': 0.000427727239711505, 'samples': 19838464, 'steps': 38746, 'loss/train': 0.7143335938453674} +03/05/2022 11:00:40 - INFO - codeparrot_training - Step 38747: {'lr': 0.00042772350752281823, 'samples': 19838976, 'steps': 38747, 'loss/train': 1.5477380752563477} +03/05/2022 11:00:41 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 11:00:45 - INFO - codeparrot_training - Step 38748: {'lr': 0.000427719775254052, 'samples': 19839488, 'steps': 38748, 'loss/train': 0.8484777808189392} +03/05/2022 11:00:48 - INFO - codeparrot_training - Step 38749: {'lr': 0.00042771604290520795, 'samples': 19840000, 'steps': 38749, 'loss/train': 2.1488730907440186} +03/05/2022 11:00:50 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/05/2022 11:00:54 - INFO - codeparrot_training - Step 38750: {'lr': 0.00042771231047628776, 'samples': 19840512, 'steps': 38750, 'loss/train': 1.8218976259231567} +03/05/2022 11:00:57 - INFO - codeparrot_training - Step 38751: {'lr': 0.0004277085779672932, 'samples': 19841024, 'steps': 38751, 'loss/train': 1.9549795389175415} +03/05/2022 11:00:58 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/05/2022 11:01:02 - INFO - codeparrot_training - Step 38752: {'lr': 0.0004277048453782259, 'samples': 19841536, 'steps': 38752, 'loss/train': 1.8467975854873657} +03/05/2022 11:01:05 - INFO - codeparrot_training - Step 38753: {'lr': 0.0004277011127090875, 'samples': 19842048, 'steps': 38753, 'loss/train': 0.9890680909156799} +03/05/2022 11:01:06 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 11:01:11 - INFO - codeparrot_training - Step 38754: {'lr': 0.0004276973799598798, 'samples': 19842560, 'steps': 38754, 'loss/train': 1.738882303237915} +03/05/2022 11:01:14 - INFO - codeparrot_training - Step 38755: {'lr': 0.0004276936471306043, 'samples': 19843072, 'steps': 38755, 'loss/train': 2.3517298698425293} +03/05/2022 11:01:14 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 11:01:19 - INFO - codeparrot_training - Step 38756: {'lr': 0.00042768991422126285, 'samples': 19843584, 'steps': 38756, 'loss/train': 1.0692189931869507} +03/05/2022 11:01:22 - INFO - codeparrot_training - Step 38757: {'lr': 0.00042768618123185703, 'samples': 19844096, 'steps': 38757, 'loss/train': 1.149498701095581} +03/05/2022 11:01:23 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 11:01:27 - INFO - codeparrot_training - Step 38758: {'lr': 0.00042768244816238863, 'samples': 19844608, 'steps': 38758, 'loss/train': 1.8293123245239258} +03/05/2022 11:01:31 - INFO - codeparrot_training - Step 38759: {'lr': 0.00042767871501285916, 'samples': 19845120, 'steps': 38759, 'loss/train': 1.8742057085037231} +03/05/2022 11:01:31 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 11:01:36 - INFO - codeparrot_training - Step 38760: {'lr': 0.00042767498178327047, 'samples': 19845632, 'steps': 38760, 'loss/train': 0.9085041284561157} +03/05/2022 11:01:39 - INFO - codeparrot_training - Step 38761: {'lr': 0.00042767124847362413, 'samples': 19846144, 'steps': 38761, 'loss/train': 0.607943594455719} +03/05/2022 11:01:39 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/05/2022 11:01:44 - INFO - codeparrot_training - Step 38762: {'lr': 0.00042766751508392187, 'samples': 19846656, 'steps': 38762, 'loss/train': 1.7551029920578003} +03/05/2022 11:01:47 - INFO - codeparrot_training - Step 38763: {'lr': 0.00042766378161416543, 'samples': 19847168, 'steps': 38763, 'loss/train': 2.536116123199463} +03/05/2022 11:01:48 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 11:01:53 - INFO - codeparrot_training - Step 38764: {'lr': 0.00042766004806435643, 'samples': 19847680, 'steps': 38764, 'loss/train': 1.4861551523208618} +03/05/2022 11:01:56 - INFO - codeparrot_training - Step 38765: {'lr': 0.0004276563144344965, 'samples': 19848192, 'steps': 38765, 'loss/train': 1.4665184020996094} +03/05/2022 11:01:56 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/05/2022 11:02:01 - INFO - codeparrot_training - Step 38766: {'lr': 0.00042765258072458733, 'samples': 19848704, 'steps': 38766, 'loss/train': 1.761582374572754} +03/05/2022 11:02:04 - INFO - codeparrot_training - Step 38767: {'lr': 0.00042764884693463075, 'samples': 19849216, 'steps': 38767, 'loss/train': 1.673612117767334} +03/05/2022 11:02:04 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/05/2022 11:02:09 - INFO - codeparrot_training - Step 38768: {'lr': 0.0004276451130646283, 'samples': 19849728, 'steps': 38768, 'loss/train': 2.0063512325286865} +03/05/2022 11:02:13 - INFO - codeparrot_training - Step 38769: {'lr': 0.0004276413791145817, 'samples': 19850240, 'steps': 38769, 'loss/train': 1.526882290840149} +03/05/2022 11:02:13 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 11:02:18 - INFO - codeparrot_training - Step 38770: {'lr': 0.00042763764508449263, 'samples': 19850752, 'steps': 38770, 'loss/train': 1.1674095392227173} +03/05/2022 11:02:21 - INFO - codeparrot_training - Step 38771: {'lr': 0.0004276339109743628, 'samples': 19851264, 'steps': 38771, 'loss/train': 2.281853437423706} +03/05/2022 11:02:21 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/05/2022 11:02:26 - INFO - codeparrot_training - Step 38772: {'lr': 0.0004276301767841939, 'samples': 19851776, 'steps': 38772, 'loss/train': 1.3355110883712769} +03/05/2022 11:02:30 - INFO - codeparrot_training - Step 38773: {'lr': 0.00042762644251398755, 'samples': 19852288, 'steps': 38773, 'loss/train': 1.3679935932159424} +03/05/2022 11:02:30 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 11:02:35 - INFO - codeparrot_training - Step 38774: {'lr': 0.0004276227081637454, 'samples': 19852800, 'steps': 38774, 'loss/train': 2.0201175212860107} +03/05/2022 11:02:38 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 11:02:40 - INFO - codeparrot_training - Step 38775: {'lr': 0.00042761897373346923, 'samples': 19853312, 'steps': 38775, 'loss/train': 1.4436979293823242} +03/05/2022 11:02:43 - INFO - codeparrot_training - Step 38776: {'lr': 0.0004276152392231608, 'samples': 19853824, 'steps': 38776, 'loss/train': 1.448146104812622} +03/05/2022 11:02:46 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 11:02:49 - INFO - codeparrot_training - Step 38777: {'lr': 0.00042761150463282164, 'samples': 19854336, 'steps': 38777, 'loss/train': 0.18870750069618225} +03/05/2022 11:02:52 - INFO - codeparrot_training - Step 38778: {'lr': 0.0004276077699624534, 'samples': 19854848, 'steps': 38778, 'loss/train': 1.9507322311401367} +03/05/2022 11:02:55 - INFO - codeparrot_training - Step 38779: {'lr': 0.0004276040352120578, 'samples': 19855360, 'steps': 38779, 'loss/train': 1.7470334768295288} +03/05/2022 11:02:55 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/05/2022 11:03:00 - INFO - codeparrot_training - Step 38780: {'lr': 0.0004276003003816367, 'samples': 19855872, 'steps': 38780, 'loss/train': 1.4377894401550293} +03/05/2022 11:03:04 - INFO - codeparrot_training - Step 38781: {'lr': 0.0004275965654711916, 'samples': 19856384, 'steps': 38781, 'loss/train': 1.9451509714126587} +03/05/2022 11:03:04 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 11:03:09 - INFO - codeparrot_training - Step 38782: {'lr': 0.0004275928304807242, 'samples': 19856896, 'steps': 38782, 'loss/train': 2.2601380348205566} +03/05/2022 11:03:12 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 11:03:14 - INFO - codeparrot_training - Step 38783: {'lr': 0.0004275890954102362, 'samples': 19857408, 'steps': 38783, 'loss/train': 1.978162169456482} +03/05/2022 11:03:17 - INFO - codeparrot_training - Step 38784: {'lr': 0.0004275853602597294, 'samples': 19857920, 'steps': 38784, 'loss/train': 1.5639010667800903} +03/05/2022 11:03:20 - INFO - codeparrot_training - Step 38785: {'lr': 0.00042758162502920527, 'samples': 19858432, 'steps': 38785, 'loss/train': 4.148996353149414} +03/05/2022 11:03:20 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 11:03:26 - INFO - codeparrot_training - Step 38786: {'lr': 0.0004275778897186656, 'samples': 19858944, 'steps': 38786, 'loss/train': 1.1092439889907837} +03/05/2022 11:03:29 - INFO - codeparrot_training - Step 38787: {'lr': 0.0004275741543281121, 'samples': 19859456, 'steps': 38787, 'loss/train': 1.6421773433685303} +03/05/2022 11:03:29 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 11:03:34 - INFO - codeparrot_training - Step 38788: {'lr': 0.0004275704188575464, 'samples': 19859968, 'steps': 38788, 'loss/train': 1.2379320859909058} +03/05/2022 11:03:38 - INFO - codeparrot_training - Step 38789: {'lr': 0.00042756668330697024, 'samples': 19860480, 'steps': 38789, 'loss/train': 1.6653393507003784} +03/05/2022 11:03:38 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 11:03:43 - INFO - codeparrot_training - Step 38790: {'lr': 0.00042756294767638527, 'samples': 19860992, 'steps': 38790, 'loss/train': 1.7198596000671387} +03/05/2022 11:03:46 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 11:03:48 - INFO - codeparrot_training - Step 38791: {'lr': 0.00042755921196579316, 'samples': 19861504, 'steps': 38791, 'loss/train': 1.2671146392822266} +03/05/2022 11:03:51 - INFO - codeparrot_training - Step 38792: {'lr': 0.0004275554761751956, 'samples': 19862016, 'steps': 38792, 'loss/train': 2.116487979888916} +03/05/2022 11:03:54 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/05/2022 11:03:57 - INFO - codeparrot_training - Step 38793: {'lr': 0.0004275517403045943, 'samples': 19862528, 'steps': 38793, 'loss/train': 2.010054588317871} +03/05/2022 11:04:00 - INFO - codeparrot_training - Step 38794: {'lr': 0.000427548004353991, 'samples': 19863040, 'steps': 38794, 'loss/train': 1.5263394117355347} +03/05/2022 11:04:03 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 11:04:05 - INFO - codeparrot_training - Step 38795: {'lr': 0.00042754426832338724, 'samples': 19863552, 'steps': 38795, 'loss/train': 3.899728298187256} +03/05/2022 11:04:08 - INFO - codeparrot_training - Step 38796: {'lr': 0.00042754053221278476, 'samples': 19864064, 'steps': 38796, 'loss/train': 2.0442090034484863} +03/05/2022 11:04:11 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 11:04:13 - INFO - codeparrot_training - Step 38797: {'lr': 0.0004275367960221853, 'samples': 19864576, 'steps': 38797, 'loss/train': 1.4188014268875122} +03/05/2022 11:04:17 - INFO - codeparrot_training - Step 38798: {'lr': 0.0004275330597515904, 'samples': 19865088, 'steps': 38798, 'loss/train': 1.745062232017517} +03/05/2022 11:04:19 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 11:04:22 - INFO - codeparrot_training - Step 38799: {'lr': 0.00042752932340100195, 'samples': 19865600, 'steps': 38799, 'loss/train': 1.4455066919326782} +03/05/2022 11:04:25 - INFO - codeparrot_training - Step 38800: {'lr': 0.00042752558697042143, 'samples': 19866112, 'steps': 38800, 'loss/train': 1.895833134651184} +03/05/2022 11:04:27 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 11:04:30 - INFO - codeparrot_training - Step 38801: {'lr': 0.0004275218504598507, 'samples': 19866624, 'steps': 38801, 'loss/train': 1.1686111688613892} +03/05/2022 11:04:33 - INFO - codeparrot_training - Step 38802: {'lr': 0.0004275181138692914, 'samples': 19867136, 'steps': 38802, 'loss/train': 1.7747619152069092} +03/05/2022 11:04:36 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 11:04:39 - INFO - codeparrot_training - Step 38803: {'lr': 0.0004275143771987451, 'samples': 19867648, 'steps': 38803, 'loss/train': 1.614460825920105} +03/05/2022 11:04:42 - INFO - codeparrot_training - Step 38804: {'lr': 0.00042751064044821354, 'samples': 19868160, 'steps': 38804, 'loss/train': 1.4653425216674805} +03/05/2022 11:04:45 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/05/2022 11:04:47 - INFO - codeparrot_training - Step 38805: {'lr': 0.0004275069036176985, 'samples': 19868672, 'steps': 38805, 'loss/train': 1.609207034111023} +03/05/2022 11:04:50 - INFO - codeparrot_training - Step 38806: {'lr': 0.0004275031667072015, 'samples': 19869184, 'steps': 38806, 'loss/train': 0.9228640794754028} +03/05/2022 11:04:53 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/05/2022 11:04:56 - INFO - codeparrot_training - Step 38807: {'lr': 0.0004274994297167244, 'samples': 19869696, 'steps': 38807, 'loss/train': 1.5836750268936157} +03/05/2022 11:04:59 - INFO - codeparrot_training - Step 38808: {'lr': 0.00042749569264626875, 'samples': 19870208, 'steps': 38808, 'loss/train': 1.6864134073257446} +03/05/2022 11:05:01 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/05/2022 11:05:04 - INFO - codeparrot_training - Step 38809: {'lr': 0.0004274919554958363, 'samples': 19870720, 'steps': 38809, 'loss/train': 2.057582139968872} +03/05/2022 11:05:07 - INFO - codeparrot_training - Step 38810: {'lr': 0.00042748821826542875, 'samples': 19871232, 'steps': 38810, 'loss/train': 1.1315243244171143} +03/05/2022 11:05:10 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 11:05:13 - INFO - codeparrot_training - Step 38811: {'lr': 0.00042748448095504765, 'samples': 19871744, 'steps': 38811, 'loss/train': 1.936607003211975} +03/05/2022 11:05:16 - INFO - codeparrot_training - Step 38812: {'lr': 0.0004274807435646948, 'samples': 19872256, 'steps': 38812, 'loss/train': 1.7052066326141357} +03/05/2022 11:05:18 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 11:05:21 - INFO - codeparrot_training - Step 38813: {'lr': 0.0004274770060943719, 'samples': 19872768, 'steps': 38813, 'loss/train': 1.3649488687515259} +03/05/2022 11:05:24 - INFO - codeparrot_training - Step 38814: {'lr': 0.00042747326854408063, 'samples': 19873280, 'steps': 38814, 'loss/train': 1.6063501834869385} +03/05/2022 11:05:27 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/05/2022 11:05:29 - INFO - codeparrot_training - Step 38815: {'lr': 0.00042746953091382254, 'samples': 19873792, 'steps': 38815, 'loss/train': 2.1427087783813477} +03/05/2022 11:05:33 - INFO - codeparrot_training - Step 38816: {'lr': 0.00042746579320359956, 'samples': 19874304, 'steps': 38816, 'loss/train': 2.024057388305664} +03/05/2022 11:05:36 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 11:05:38 - INFO - codeparrot_training - Step 38817: {'lr': 0.00042746205541341315, 'samples': 19874816, 'steps': 38817, 'loss/train': 0.9468193054199219} +03/05/2022 11:05:41 - INFO - codeparrot_training - Step 38818: {'lr': 0.0004274583175432651, 'samples': 19875328, 'steps': 38818, 'loss/train': 1.0230181217193604} +03/05/2022 11:05:44 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/05/2022 11:05:46 - INFO - codeparrot_training - Step 38819: {'lr': 0.000427454579593157, 'samples': 19875840, 'steps': 38819, 'loss/train': 1.914709210395813} +03/05/2022 11:05:50 - INFO - codeparrot_training - Step 38820: {'lr': 0.00042745084156309065, 'samples': 19876352, 'steps': 38820, 'loss/train': 1.675291895866394} +03/05/2022 11:05:52 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 11:05:55 - INFO - codeparrot_training - Step 38821: {'lr': 0.00042744710345306774, 'samples': 19876864, 'steps': 38821, 'loss/train': 2.1141064167022705} +03/05/2022 11:05:58 - INFO - codeparrot_training - Step 38822: {'lr': 0.00042744336526308986, 'samples': 19877376, 'steps': 38822, 'loss/train': 1.5119497776031494} +03/05/2022 11:06:01 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 11:06:03 - INFO - codeparrot_training - Step 38823: {'lr': 0.0004274396269931587, 'samples': 19877888, 'steps': 38823, 'loss/train': 2.0717127323150635} +03/05/2022 11:06:07 - INFO - codeparrot_training - Step 38824: {'lr': 0.0004274358886432761, 'samples': 19878400, 'steps': 38824, 'loss/train': 1.378435730934143} +03/05/2022 11:06:09 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 11:06:12 - INFO - codeparrot_training - Step 38825: {'lr': 0.0004274321502134435, 'samples': 19878912, 'steps': 38825, 'loss/train': 1.9915697574615479} +03/05/2022 11:06:15 - INFO - codeparrot_training - Step 38826: {'lr': 0.00042742841170366274, 'samples': 19879424, 'steps': 38826, 'loss/train': 1.9072010517120361} +03/05/2022 11:06:18 - INFO - codeparrot_training - Step 38827: {'lr': 0.0004274246731139355, 'samples': 19879936, 'steps': 38827, 'loss/train': 0.8838217854499817} +03/05/2022 11:06:18 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/05/2022 11:06:24 - INFO - codeparrot_training - Step 38828: {'lr': 0.0004274209344442634, 'samples': 19880448, 'steps': 38828, 'loss/train': 1.9953337907791138} +03/05/2022 11:06:27 - INFO - codeparrot_training - Step 38829: {'lr': 0.00042741719569464834, 'samples': 19880960, 'steps': 38829, 'loss/train': 1.7629108428955078} +03/05/2022 11:06:27 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/05/2022 11:06:32 - INFO - codeparrot_training - Step 38830: {'lr': 0.0004274134568650916, 'samples': 19881472, 'steps': 38830, 'loss/train': 2.0384504795074463} +03/05/2022 11:06:35 - INFO - codeparrot_training - Step 38831: {'lr': 0.00042740971795559527, 'samples': 19881984, 'steps': 38831, 'loss/train': 1.5537960529327393} +03/05/2022 11:06:35 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 11:06:41 - INFO - codeparrot_training - Step 38832: {'lr': 0.00042740597896616075, 'samples': 19882496, 'steps': 38832, 'loss/train': 0.09074151515960693} +03/05/2022 11:06:44 - INFO - codeparrot_training - Step 38833: {'lr': 0.00042740223989678984, 'samples': 19883008, 'steps': 38833, 'loss/train': 1.9083868265151978} +03/05/2022 11:06:44 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 11:06:49 - INFO - codeparrot_training - Step 38834: {'lr': 0.0004273985007474842, 'samples': 19883520, 'steps': 38834, 'loss/train': 2.390120029449463} +03/05/2022 11:06:52 - INFO - codeparrot_training - Step 38835: {'lr': 0.00042739476151824565, 'samples': 19884032, 'steps': 38835, 'loss/train': 1.0216453075408936} +03/05/2022 11:06:52 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 11:06:58 - INFO - codeparrot_training - Step 38836: {'lr': 0.00042739102220907567, 'samples': 19884544, 'steps': 38836, 'loss/train': 1.9429576396942139} +03/05/2022 11:07:00 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 11:07:04 - INFO - codeparrot_training - Step 38837: {'lr': 0.000427387282819976, 'samples': 19885056, 'steps': 38837, 'loss/train': 1.8225210905075073} +03/05/2022 11:07:07 - INFO - codeparrot_training - Step 38838: {'lr': 0.0004273835433509484, 'samples': 19885568, 'steps': 38838, 'loss/train': 2.1098668575286865} +03/05/2022 11:07:10 - INFO - codeparrot_training - Step 38839: {'lr': 0.0004273798038019945, 'samples': 19886080, 'steps': 38839, 'loss/train': 2.106077194213867} +03/05/2022 11:07:13 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 11:07:15 - INFO - codeparrot_training - Step 38840: {'lr': 0.000427376064173116, 'samples': 19886592, 'steps': 38840, 'loss/train': 1.0053428411483765} +03/05/2022 11:07:18 - INFO - codeparrot_training - Step 38841: {'lr': 0.0004273723244643146, 'samples': 19887104, 'steps': 38841, 'loss/train': 1.7379900217056274} +03/05/2022 11:07:21 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/05/2022 11:07:24 - INFO - codeparrot_training - Step 38842: {'lr': 0.000427368584675592, 'samples': 19887616, 'steps': 38842, 'loss/train': 0.8685247302055359} +03/05/2022 11:07:27 - INFO - codeparrot_training - Step 38843: {'lr': 0.0004273648448069498, 'samples': 19888128, 'steps': 38843, 'loss/train': 1.1872957944869995} +03/05/2022 11:07:30 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/05/2022 11:07:32 - INFO - codeparrot_training - Step 38844: {'lr': 0.00042736110485838973, 'samples': 19888640, 'steps': 38844, 'loss/train': 0.15888711810112} +03/05/2022 11:07:36 - INFO - codeparrot_training - Step 38845: {'lr': 0.0004273573648299135, 'samples': 19889152, 'steps': 38845, 'loss/train': 0.06848172843456268} +03/05/2022 11:07:38 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 11:07:41 - INFO - codeparrot_training - Step 38846: {'lr': 0.0004273536247215227, 'samples': 19889664, 'steps': 38846, 'loss/train': 1.6705498695373535} +03/05/2022 11:07:44 - INFO - codeparrot_training - Step 38847: {'lr': 0.00042734988453321923, 'samples': 19890176, 'steps': 38847, 'loss/train': 2.401719808578491} +03/05/2022 11:07:47 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 11:07:49 - INFO - codeparrot_training - Step 38848: {'lr': 0.0004273461442650046, 'samples': 19890688, 'steps': 38848, 'loss/train': 2.2269763946533203} +03/05/2022 11:07:53 - INFO - codeparrot_training - Step 38849: {'lr': 0.0004273424039168805, 'samples': 19891200, 'steps': 38849, 'loss/train': 1.3912190198898315} +03/05/2022 11:07:55 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/05/2022 11:07:58 - INFO - codeparrot_training - Step 38850: {'lr': 0.00042733866348884864, 'samples': 19891712, 'steps': 38850, 'loss/train': 1.7977155447006226} +03/05/2022 11:08:01 - INFO - codeparrot_training - Step 38851: {'lr': 0.0004273349229809108, 'samples': 19892224, 'steps': 38851, 'loss/train': 1.1809056997299194} +03/05/2022 11:08:03 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 11:08:06 - INFO - codeparrot_training - Step 38852: {'lr': 0.00042733118239306845, 'samples': 19892736, 'steps': 38852, 'loss/train': 1.5782252550125122} +03/05/2022 11:08:09 - INFO - codeparrot_training - Step 38853: {'lr': 0.0004273274417253235, 'samples': 19893248, 'steps': 38853, 'loss/train': 1.4937671422958374} +03/05/2022 11:08:12 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 11:08:15 - INFO - codeparrot_training - Step 38854: {'lr': 0.00042732370097767756, 'samples': 19893760, 'steps': 38854, 'loss/train': 1.1941438913345337} +03/05/2022 11:08:18 - INFO - codeparrot_training - Step 38855: {'lr': 0.0004273199601501322, 'samples': 19894272, 'steps': 38855, 'loss/train': 1.5130765438079834} +03/05/2022 11:08:20 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 11:08:23 - INFO - codeparrot_training - Step 38856: {'lr': 0.0004273162192426893, 'samples': 19894784, 'steps': 38856, 'loss/train': 1.9100626707077026} +03/05/2022 11:08:26 - INFO - codeparrot_training - Step 38857: {'lr': 0.00042731247825535037, 'samples': 19895296, 'steps': 38857, 'loss/train': 0.5294350385665894} +03/05/2022 11:08:28 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/05/2022 11:08:32 - INFO - codeparrot_training - Step 38858: {'lr': 0.00042730873718811724, 'samples': 19895808, 'steps': 38858, 'loss/train': 1.096190333366394} +03/05/2022 11:08:35 - INFO - codeparrot_training - Step 38859: {'lr': 0.0004273049960409915, 'samples': 19896320, 'steps': 38859, 'loss/train': 2.002255916595459} +03/05/2022 11:08:37 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/05/2022 11:08:40 - INFO - codeparrot_training - Step 38860: {'lr': 0.00042730125481397487, 'samples': 19896832, 'steps': 38860, 'loss/train': 2.0559184551239014} +03/05/2022 11:08:43 - INFO - codeparrot_training - Step 38861: {'lr': 0.00042729751350706905, 'samples': 19897344, 'steps': 38861, 'loss/train': 1.495954155921936} +03/05/2022 11:08:45 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 11:08:48 - INFO - codeparrot_training - Step 38862: {'lr': 0.00042729377212027557, 'samples': 19897856, 'steps': 38862, 'loss/train': 2.0471136569976807} +03/05/2022 11:08:52 - INFO - codeparrot_training - Step 38863: {'lr': 0.0004272900306535964, 'samples': 19898368, 'steps': 38863, 'loss/train': 0.8550220727920532} +03/05/2022 11:08:54 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/05/2022 11:08:57 - INFO - codeparrot_training - Step 38864: {'lr': 0.00042728628910703305, 'samples': 19898880, 'steps': 38864, 'loss/train': 2.3690879344940186} +03/05/2022 11:09:00 - INFO - codeparrot_training - Step 38865: {'lr': 0.0004272825474805872, 'samples': 19899392, 'steps': 38865, 'loss/train': 1.5645257234573364} +03/05/2022 11:09:03 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 11:09:05 - INFO - codeparrot_training - Step 38866: {'lr': 0.0004272788057742606, 'samples': 19899904, 'steps': 38866, 'loss/train': 1.7530213594436646} +03/05/2022 11:09:08 - INFO - codeparrot_training - Step 38867: {'lr': 0.0004272750639880549, 'samples': 19900416, 'steps': 38867, 'loss/train': 1.3539156913757324} +03/05/2022 11:09:11 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 11:09:14 - INFO - codeparrot_training - Step 38868: {'lr': 0.0004272713221219718, 'samples': 19900928, 'steps': 38868, 'loss/train': 2.2932496070861816} +03/05/2022 11:09:17 - INFO - codeparrot_training - Step 38869: {'lr': 0.00042726758017601297, 'samples': 19901440, 'steps': 38869, 'loss/train': 2.219136953353882} +03/05/2022 11:09:19 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 11:09:22 - INFO - codeparrot_training - Step 38870: {'lr': 0.00042726383815018006, 'samples': 19901952, 'steps': 38870, 'loss/train': 1.5567244291305542} +03/05/2022 11:09:25 - INFO - codeparrot_training - Step 38871: {'lr': 0.00042726009604447484, 'samples': 19902464, 'steps': 38871, 'loss/train': 2.2527847290039062} +03/05/2022 11:09:28 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 11:09:31 - INFO - codeparrot_training - Step 38872: {'lr': 0.00042725635385889893, 'samples': 19902976, 'steps': 38872, 'loss/train': 1.7845951318740845} +03/05/2022 11:09:34 - INFO - codeparrot_training - Step 38873: {'lr': 0.0004272526115934541, 'samples': 19903488, 'steps': 38873, 'loss/train': 1.5938351154327393} +03/05/2022 11:09:36 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 11:09:39 - INFO - codeparrot_training - Step 38874: {'lr': 0.0004272488692481419, 'samples': 19904000, 'steps': 38874, 'loss/train': 2.0254082679748535} +03/05/2022 11:09:42 - INFO - codeparrot_training - Step 38875: {'lr': 0.00042724512682296416, 'samples': 19904512, 'steps': 38875, 'loss/train': 0.8528100252151489} +03/05/2022 11:09:44 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 11:09:47 - INFO - codeparrot_training - Step 38876: {'lr': 0.00042724138431792245, 'samples': 19905024, 'steps': 38876, 'loss/train': 1.939806580543518} +03/05/2022 11:09:50 - INFO - codeparrot_training - Step 38877: {'lr': 0.0004272376417330186, 'samples': 19905536, 'steps': 38877, 'loss/train': 1.611383080482483} +03/05/2022 11:09:52 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/05/2022 11:09:56 - INFO - codeparrot_training - Step 38878: {'lr': 0.00042723389906825415, 'samples': 19906048, 'steps': 38878, 'loss/train': 1.7664685249328613} +03/05/2022 11:09:59 - INFO - codeparrot_training - Step 38879: {'lr': 0.0004272301563236308, 'samples': 19906560, 'steps': 38879, 'loss/train': 1.7835584878921509} +03/05/2022 11:10:01 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 11:10:04 - INFO - codeparrot_training - Step 38880: {'lr': 0.0004272264134991503, 'samples': 19907072, 'steps': 38880, 'loss/train': 2.6062960624694824} +03/05/2022 11:10:07 - INFO - codeparrot_training - Step 38881: {'lr': 0.0004272226705948143, 'samples': 19907584, 'steps': 38881, 'loss/train': 1.4331471920013428} +03/05/2022 11:10:09 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/05/2022 11:10:13 - INFO - codeparrot_training - Step 38882: {'lr': 0.00042721892761062453, 'samples': 19908096, 'steps': 38882, 'loss/train': 1.6260648965835571} +03/05/2022 11:10:16 - INFO - codeparrot_training - Step 38883: {'lr': 0.00042721518454658265, 'samples': 19908608, 'steps': 38883, 'loss/train': 2.2704081535339355} +03/05/2022 11:10:18 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 11:10:21 - INFO - codeparrot_training - Step 38884: {'lr': 0.0004272114414026903, 'samples': 19909120, 'steps': 38884, 'loss/train': 0.8609506487846375} +03/05/2022 11:10:24 - INFO - codeparrot_training - Step 38885: {'lr': 0.00042720769817894926, 'samples': 19909632, 'steps': 38885, 'loss/train': 1.3536765575408936} +03/05/2022 11:10:26 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 11:10:29 - INFO - codeparrot_training - Step 38886: {'lr': 0.00042720395487536115, 'samples': 19910144, 'steps': 38886, 'loss/train': 1.5734316110610962} +03/05/2022 11:10:33 - INFO - codeparrot_training - Step 38887: {'lr': 0.0004272002114919277, 'samples': 19910656, 'steps': 38887, 'loss/train': 1.7789583206176758} +03/05/2022 11:10:34 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/05/2022 11:10:38 - INFO - codeparrot_training - Step 38888: {'lr': 0.0004271964680286505, 'samples': 19911168, 'steps': 38888, 'loss/train': 1.5227296352386475} +03/05/2022 11:10:41 - INFO - codeparrot_training - Step 38889: {'lr': 0.00042719272448553137, 'samples': 19911680, 'steps': 38889, 'loss/train': 2.528425931930542} +03/05/2022 11:10:43 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/05/2022 11:10:47 - INFO - codeparrot_training - Step 38890: {'lr': 0.00042718898086257183, 'samples': 19912192, 'steps': 38890, 'loss/train': 1.3084510564804077} +03/05/2022 11:10:50 - INFO - codeparrot_training - Step 38891: {'lr': 0.0004271852371597738, 'samples': 19912704, 'steps': 38891, 'loss/train': 2.1403250694274902} +03/05/2022 11:10:53 - INFO - codeparrot_training - Step 38892: {'lr': 0.00042718149337713873, 'samples': 19913216, 'steps': 38892, 'loss/train': 6.25106143951416} +03/05/2022 11:10:56 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 11:10:59 - INFO - codeparrot_training - Step 38893: {'lr': 0.0004271777495146685, 'samples': 19913728, 'steps': 38893, 'loss/train': 2.2443454265594482} +03/05/2022 11:11:02 - INFO - codeparrot_training - Step 38894: {'lr': 0.00042717400557236467, 'samples': 19914240, 'steps': 38894, 'loss/train': 1.8351449966430664} +03/05/2022 11:11:05 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 11:11:07 - INFO - codeparrot_training - Step 38895: {'lr': 0.000427170261550229, 'samples': 19914752, 'steps': 38895, 'loss/train': 1.4891761541366577} +03/05/2022 11:11:11 - INFO - codeparrot_training - Step 38896: {'lr': 0.0004271665174482631, 'samples': 19915264, 'steps': 38896, 'loss/train': 1.907853364944458} +03/05/2022 11:11:13 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/05/2022 11:11:16 - INFO - codeparrot_training - Step 38897: {'lr': 0.0004271627732664687, 'samples': 19915776, 'steps': 38897, 'loss/train': 1.8562076091766357} +03/05/2022 11:11:19 - INFO - codeparrot_training - Step 38898: {'lr': 0.0004271590290048475, 'samples': 19916288, 'steps': 38898, 'loss/train': 1.7378627061843872} +03/05/2022 11:11:22 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/05/2022 11:11:24 - INFO - codeparrot_training - Step 38899: {'lr': 0.00042715528466340117, 'samples': 19916800, 'steps': 38899, 'loss/train': 1.1246466636657715} +03/05/2022 11:11:28 - INFO - codeparrot_training - Step 38900: {'lr': 0.00042715154024213143, 'samples': 19917312, 'steps': 38900, 'loss/train': 1.6148508787155151} +03/05/2022 11:11:31 - INFO - codeparrot_training - Step 38901: {'lr': 0.0004271477957410399, 'samples': 19917824, 'steps': 38901, 'loss/train': 1.4736651182174683} +03/05/2022 11:11:31 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 11:11:36 - INFO - codeparrot_training - Step 38902: {'lr': 0.00042714405116012834, 'samples': 19918336, 'steps': 38902, 'loss/train': 2.015329122543335} +03/05/2022 11:11:39 - INFO - codeparrot_training - Step 38903: {'lr': 0.0004271403064993984, 'samples': 19918848, 'steps': 38903, 'loss/train': 2.12369704246521} +03/05/2022 11:11:39 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/05/2022 11:11:45 - INFO - codeparrot_training - Step 38904: {'lr': 0.00042713656175885173, 'samples': 19919360, 'steps': 38904, 'loss/train': 1.616765022277832} +03/05/2022 11:11:48 - INFO - codeparrot_training - Step 38905: {'lr': 0.00042713281693849015, 'samples': 19919872, 'steps': 38905, 'loss/train': 2.083192825317383} +03/05/2022 11:11:48 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/05/2022 11:11:53 - INFO - codeparrot_training - Step 38906: {'lr': 0.0004271290720383152, 'samples': 19920384, 'steps': 38906, 'loss/train': 2.3888847827911377} +03/05/2022 11:11:56 - INFO - codeparrot_training - Step 38907: {'lr': 0.00042712532705832865, 'samples': 19920896, 'steps': 38907, 'loss/train': 2.012119770050049} +03/05/2022 11:11:56 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 11:12:02 - INFO - codeparrot_training - Step 38908: {'lr': 0.0004271215819985321, 'samples': 19921408, 'steps': 38908, 'loss/train': 1.7760659456253052} +03/05/2022 11:12:05 - INFO - codeparrot_training - Step 38909: {'lr': 0.0004271178368589273, 'samples': 19921920, 'steps': 38909, 'loss/train': 1.8998491764068604} +03/05/2022 11:12:05 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 11:12:10 - INFO - codeparrot_training - Step 38910: {'lr': 0.000427114091639516, 'samples': 19922432, 'steps': 38910, 'loss/train': 0.3795957565307617} +03/05/2022 11:12:13 - INFO - codeparrot_training - Step 38911: {'lr': 0.0004271103463402998, 'samples': 19922944, 'steps': 38911, 'loss/train': 0.9166125059127808} +03/05/2022 11:12:13 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 11:12:18 - INFO - codeparrot_training - Step 38912: {'lr': 0.0004271066009612804, 'samples': 19923456, 'steps': 38912, 'loss/train': 2.433647871017456} +03/05/2022 11:12:22 - INFO - codeparrot_training - Step 38913: {'lr': 0.0004271028555024594, 'samples': 19923968, 'steps': 38913, 'loss/train': 2.044684410095215} +03/05/2022 11:12:22 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 11:12:27 - INFO - codeparrot_training - Step 38914: {'lr': 0.0004270991099638387, 'samples': 19924480, 'steps': 38914, 'loss/train': 1.634007453918457} +03/05/2022 11:12:30 - INFO - codeparrot_training - Step 38915: {'lr': 0.0004270953643454199, 'samples': 19924992, 'steps': 38915, 'loss/train': 1.565908432006836} +03/05/2022 11:12:31 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/05/2022 11:12:36 - INFO - codeparrot_training - Step 38916: {'lr': 0.0004270916186472046, 'samples': 19925504, 'steps': 38916, 'loss/train': 2.8455967903137207} +03/05/2022 11:12:39 - INFO - codeparrot_training - Step 38917: {'lr': 0.0004270878728691946, 'samples': 19926016, 'steps': 38917, 'loss/train': 1.6197147369384766} +03/05/2022 11:12:44 - INFO - codeparrot_training - Step 38918: {'lr': 0.00042708412701139147, 'samples': 19926528, 'steps': 38918, 'loss/train': 2.579059362411499} +03/05/2022 11:12:47 - INFO - codeparrot_training - Step 38919: {'lr': 0.000427080381073797, 'samples': 19927040, 'steps': 38919, 'loss/train': 1.97496497631073} +03/05/2022 11:12:48 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/05/2022 11:12:52 - INFO - codeparrot_training - Step 38920: {'lr': 0.00042707663505641287, 'samples': 19927552, 'steps': 38920, 'loss/train': 2.032160997390747} +03/05/2022 11:12:56 - INFO - codeparrot_training - Step 38921: {'lr': 0.00042707288895924066, 'samples': 19928064, 'steps': 38921, 'loss/train': 1.2271091938018799} +03/05/2022 11:12:57 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 11:13:01 - INFO - codeparrot_training - Step 38922: {'lr': 0.0004270691427822823, 'samples': 19928576, 'steps': 38922, 'loss/train': 2.431943655014038} +03/05/2022 11:13:04 - INFO - codeparrot_training - Step 38923: {'lr': 0.0004270653965255391, 'samples': 19929088, 'steps': 38923, 'loss/train': 2.5251169204711914} +03/05/2022 11:13:05 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/05/2022 11:13:10 - INFO - codeparrot_training - Step 38924: {'lr': 0.0004270616501890131, 'samples': 19929600, 'steps': 38924, 'loss/train': 1.6960768699645996} +03/05/2022 11:13:13 - INFO - codeparrot_training - Step 38925: {'lr': 0.0004270579037727058, 'samples': 19930112, 'steps': 38925, 'loss/train': 2.3554043769836426} +03/05/2022 11:13:14 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/05/2022 11:13:18 - INFO - codeparrot_training - Step 38926: {'lr': 0.000427054157276619, 'samples': 19930624, 'steps': 38926, 'loss/train': 2.3110921382904053} +03/05/2022 11:13:21 - INFO - codeparrot_training - Step 38927: {'lr': 0.00042705041070075433, 'samples': 19931136, 'steps': 38927, 'loss/train': 0.9900491833686829} +03/05/2022 11:13:22 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 11:13:26 - INFO - codeparrot_training - Step 38928: {'lr': 0.00042704666404511343, 'samples': 19931648, 'steps': 38928, 'loss/train': 2.4232592582702637} +03/05/2022 11:13:30 - INFO - codeparrot_training - Step 38929: {'lr': 0.000427042917309698, 'samples': 19932160, 'steps': 38929, 'loss/train': 2.4030957221984863} +03/05/2022 11:13:30 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 11:13:35 - INFO - codeparrot_training - Step 38930: {'lr': 0.00042703917049450983, 'samples': 19932672, 'steps': 38930, 'loss/train': 1.1503387689590454} +03/05/2022 11:13:38 - INFO - codeparrot_training - Step 38931: {'lr': 0.0004270354235995505, 'samples': 19933184, 'steps': 38931, 'loss/train': 1.704493761062622} +03/05/2022 11:13:39 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 11:13:43 - INFO - codeparrot_training - Step 38932: {'lr': 0.0004270316766248218, 'samples': 19933696, 'steps': 38932, 'loss/train': 2.5988523960113525} +03/05/2022 11:13:46 - INFO - codeparrot_training - Step 38933: {'lr': 0.0004270279295703253, 'samples': 19934208, 'steps': 38933, 'loss/train': 1.9948276281356812} +03/05/2022 11:13:47 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 11:13:52 - INFO - codeparrot_training - Step 38934: {'lr': 0.00042702418243606275, 'samples': 19934720, 'steps': 38934, 'loss/train': 1.9496855735778809} +03/05/2022 11:13:55 - INFO - codeparrot_training - Step 38935: {'lr': 0.00042702043522203594, 'samples': 19935232, 'steps': 38935, 'loss/train': 1.516800045967102} +03/05/2022 11:13:55 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 11:14:00 - INFO - codeparrot_training - Step 38936: {'lr': 0.00042701668792824633, 'samples': 19935744, 'steps': 38936, 'loss/train': 2.778306007385254} +03/05/2022 11:14:03 - INFO - codeparrot_training - Step 38937: {'lr': 0.00042701294055469576, 'samples': 19936256, 'steps': 38937, 'loss/train': 1.4200353622436523} +03/05/2022 11:14:04 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 11:14:08 - INFO - codeparrot_training - Step 38938: {'lr': 0.0004270091931013859, 'samples': 19936768, 'steps': 38938, 'loss/train': 1.2574478387832642} +03/05/2022 11:14:12 - INFO - codeparrot_training - Step 38939: {'lr': 0.00042700544556831846, 'samples': 19937280, 'steps': 38939, 'loss/train': 2.2507317066192627} +03/05/2022 11:14:12 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 11:14:17 - INFO - codeparrot_training - Step 38940: {'lr': 0.00042700169795549504, 'samples': 19937792, 'steps': 38940, 'loss/train': 1.9035097360610962} +03/05/2022 11:14:20 - INFO - codeparrot_training - Step 38941: {'lr': 0.00042699795026291743, 'samples': 19938304, 'steps': 38941, 'loss/train': 1.7899919748306274} +03/05/2022 11:14:20 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/05/2022 11:14:25 - INFO - codeparrot_training - Step 38942: {'lr': 0.0004269942024905872, 'samples': 19938816, 'steps': 38942, 'loss/train': 1.7130547761917114} +03/05/2022 11:14:28 - INFO - codeparrot_training - Step 38943: {'lr': 0.00042699045463850623, 'samples': 19939328, 'steps': 38943, 'loss/train': 1.6139165163040161} +03/05/2022 11:14:28 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 11:14:34 - INFO - codeparrot_training - Step 38944: {'lr': 0.000426986706706676, 'samples': 19939840, 'steps': 38944, 'loss/train': 1.3511327505111694} +03/05/2022 11:14:37 - INFO - codeparrot_training - Step 38945: {'lr': 0.00042698295869509836, 'samples': 19940352, 'steps': 38945, 'loss/train': 1.525887131690979} +03/05/2022 11:14:37 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 11:14:42 - INFO - codeparrot_training - Step 38946: {'lr': 0.0004269792106037749, 'samples': 19940864, 'steps': 38946, 'loss/train': 1.5700898170471191} +03/05/2022 11:14:45 - INFO - codeparrot_training - Step 38947: {'lr': 0.0004269754624327073, 'samples': 19941376, 'steps': 38947, 'loss/train': 2.463099241256714} +03/05/2022 11:14:45 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 11:14:50 - INFO - codeparrot_training - Step 38948: {'lr': 0.0004269717141818973, 'samples': 19941888, 'steps': 38948, 'loss/train': 1.9206347465515137} +03/05/2022 11:14:54 - INFO - codeparrot_training - Step 38949: {'lr': 0.0004269679658513466, 'samples': 19942400, 'steps': 38949, 'loss/train': 1.4782088994979858} +03/05/2022 11:14:54 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/05/2022 11:14:59 - INFO - codeparrot_training - Step 38950: {'lr': 0.00042696421744105686, 'samples': 19942912, 'steps': 38950, 'loss/train': 1.3907102346420288} +03/05/2022 11:15:02 - INFO - codeparrot_training - Step 38951: {'lr': 0.0004269604689510298, 'samples': 19943424, 'steps': 38951, 'loss/train': 1.508575439453125} +03/05/2022 11:15:02 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/05/2022 11:15:08 - INFO - codeparrot_training - Step 38952: {'lr': 0.0004269567203812671, 'samples': 19943936, 'steps': 38952, 'loss/train': 2.0267443656921387} +03/05/2022 11:15:10 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/05/2022 11:15:13 - INFO - codeparrot_training - Step 38953: {'lr': 0.00042695297173177033, 'samples': 19944448, 'steps': 38953, 'loss/train': 1.5165950059890747} +03/05/2022 11:15:16 - INFO - codeparrot_training - Step 38954: {'lr': 0.0004269492230025413, 'samples': 19944960, 'steps': 38954, 'loss/train': 1.3401706218719482} +03/05/2022 11:15:19 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 11:15:21 - INFO - codeparrot_training - Step 38955: {'lr': 0.0004269454741935818, 'samples': 19945472, 'steps': 38955, 'loss/train': 2.094369888305664} +03/05/2022 11:15:24 - INFO - codeparrot_training - Step 38956: {'lr': 0.00042694172530489326, 'samples': 19945984, 'steps': 38956, 'loss/train': 0.9254721403121948} +03/05/2022 11:15:27 - INFO - codeparrot_training - Skipping example with length 996 (seq_length=1024) +03/05/2022 11:15:30 - INFO - codeparrot_training - Step 38957: {'lr': 0.00042693797633647755, 'samples': 19946496, 'steps': 38957, 'loss/train': 1.9181885719299316} +03/05/2022 11:15:33 - INFO - codeparrot_training - Step 38958: {'lr': 0.00042693422728833644, 'samples': 19947008, 'steps': 38958, 'loss/train': 1.57408607006073} +03/05/2022 11:15:35 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 11:15:38 - INFO - codeparrot_training - Step 38959: {'lr': 0.00042693047816047135, 'samples': 19947520, 'steps': 38959, 'loss/train': 1.039240837097168} +03/05/2022 11:15:41 - INFO - codeparrot_training - Step 38960: {'lr': 0.0004269267289528842, 'samples': 19948032, 'steps': 38960, 'loss/train': 1.9586362838745117} +03/05/2022 11:15:44 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 11:15:47 - INFO - codeparrot_training - Step 38961: {'lr': 0.00042692297966557657, 'samples': 19948544, 'steps': 38961, 'loss/train': 1.9032374620437622} +03/05/2022 11:15:50 - INFO - codeparrot_training - Step 38962: {'lr': 0.0004269192302985502, 'samples': 19949056, 'steps': 38962, 'loss/train': 1.4384634494781494} +03/05/2022 11:15:52 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/05/2022 11:15:55 - INFO - codeparrot_training - Step 38963: {'lr': 0.00042691548085180666, 'samples': 19949568, 'steps': 38963, 'loss/train': 2.0511012077331543} +03/05/2022 11:15:58 - INFO - codeparrot_training - Step 38964: {'lr': 0.00042691173132534775, 'samples': 19950080, 'steps': 38964, 'loss/train': 1.5592284202575684} +03/05/2022 11:16:00 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/05/2022 11:16:03 - INFO - codeparrot_training - Step 38965: {'lr': 0.0004269079817191752, 'samples': 19950592, 'steps': 38965, 'loss/train': 1.1896001100540161} +03/05/2022 11:16:07 - INFO - codeparrot_training - Step 38966: {'lr': 0.00042690423203329067, 'samples': 19951104, 'steps': 38966, 'loss/train': 1.00688898563385} +03/05/2022 11:16:09 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 11:16:12 - INFO - codeparrot_training - Step 38967: {'lr': 0.0004269004822676958, 'samples': 19951616, 'steps': 38967, 'loss/train': 1.901790738105774} +03/05/2022 11:16:15 - INFO - codeparrot_training - Step 38968: {'lr': 0.0004268967324223922, 'samples': 19952128, 'steps': 38968, 'loss/train': 1.801607608795166} +03/05/2022 11:16:17 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 11:16:20 - INFO - codeparrot_training - Step 38969: {'lr': 0.00042689298249738185, 'samples': 19952640, 'steps': 38969, 'loss/train': 2.147698163986206} +03/05/2022 11:16:23 - INFO - codeparrot_training - Step 38970: {'lr': 0.00042688923249266614, 'samples': 19953152, 'steps': 38970, 'loss/train': 1.6063201427459717} +03/05/2022 11:16:27 - INFO - codeparrot_training - Step 38971: {'lr': 0.00042688548240824687, 'samples': 19953664, 'steps': 38971, 'loss/train': 2.185361862182617} +03/05/2022 11:16:27 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/05/2022 11:16:32 - INFO - codeparrot_training - Step 38972: {'lr': 0.00042688173224412573, 'samples': 19954176, 'steps': 38972, 'loss/train': 2.025792360305786} +03/05/2022 11:16:35 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 11:16:37 - INFO - codeparrot_training - Step 38973: {'lr': 0.00042687798200030446, 'samples': 19954688, 'steps': 38973, 'loss/train': 1.676720142364502} +03/05/2022 11:16:40 - INFO - codeparrot_training - Step 38974: {'lr': 0.00042687423167678463, 'samples': 19955200, 'steps': 38974, 'loss/train': 2.049980401992798} +03/05/2022 11:16:44 - INFO - codeparrot_training - Step 38975: {'lr': 0.0004268704812735681, 'samples': 19955712, 'steps': 38975, 'loss/train': 1.6502013206481934} +03/05/2022 11:16:44 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/05/2022 11:16:49 - INFO - codeparrot_training - Step 38976: {'lr': 0.00042686673079065637, 'samples': 19956224, 'steps': 38976, 'loss/train': 1.1977686882019043} +03/05/2022 11:16:52 - INFO - codeparrot_training - Step 38977: {'lr': 0.00042686298022805126, 'samples': 19956736, 'steps': 38977, 'loss/train': 0.8904712796211243} +03/05/2022 11:16:52 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/05/2022 11:16:57 - INFO - codeparrot_training - Step 38978: {'lr': 0.0004268592295857544, 'samples': 19957248, 'steps': 38978, 'loss/train': 1.970947504043579} +03/05/2022 11:17:00 - INFO - codeparrot_training - Step 38979: {'lr': 0.0004268554788637675, 'samples': 19957760, 'steps': 38979, 'loss/train': 1.9199877977371216} +03/05/2022 11:17:01 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/05/2022 11:17:06 - INFO - codeparrot_training - Step 38980: {'lr': 0.0004268517280620923, 'samples': 19958272, 'steps': 38980, 'loss/train': 0.5075897574424744} +03/05/2022 11:17:09 - INFO - codeparrot_training - Step 38981: {'lr': 0.0004268479771807303, 'samples': 19958784, 'steps': 38981, 'loss/train': 1.8086477518081665} +03/05/2022 11:17:09 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 11:17:14 - INFO - codeparrot_training - Step 38982: {'lr': 0.00042684422621968346, 'samples': 19959296, 'steps': 38982, 'loss/train': 1.1092634201049805} +03/05/2022 11:17:17 - INFO - codeparrot_training - Step 38983: {'lr': 0.0004268404751789533, 'samples': 19959808, 'steps': 38983, 'loss/train': 0.6658592820167542} +03/05/2022 11:17:17 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 11:17:23 - INFO - codeparrot_training - Step 38984: {'lr': 0.0004268367240585416, 'samples': 19960320, 'steps': 38984, 'loss/train': 1.5414124727249146} +03/05/2022 11:17:26 - INFO - codeparrot_training - Step 38985: {'lr': 0.0004268329728584499, 'samples': 19960832, 'steps': 38985, 'loss/train': 1.8811044692993164} +03/05/2022 11:17:26 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 11:17:31 - INFO - codeparrot_training - Step 38986: {'lr': 0.0004268292215786801, 'samples': 19961344, 'steps': 38986, 'loss/train': 1.8472638130187988} +03/05/2022 11:17:34 - INFO - codeparrot_training - Step 38987: {'lr': 0.0004268254702192337, 'samples': 19961856, 'steps': 38987, 'loss/train': 1.2741777896881104} +03/05/2022 11:17:34 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/05/2022 11:17:40 - INFO - codeparrot_training - Step 38988: {'lr': 0.00042682171878011255, 'samples': 19962368, 'steps': 38988, 'loss/train': 0.5593450665473938} +03/05/2022 11:17:43 - INFO - codeparrot_training - Step 38989: {'lr': 0.00042681796726131815, 'samples': 19962880, 'steps': 38989, 'loss/train': 0.12118187546730042} +03/05/2022 11:17:43 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 11:17:48 - INFO - codeparrot_training - Step 38990: {'lr': 0.0004268142156628524, 'samples': 19963392, 'steps': 38990, 'loss/train': 1.288893461227417} +03/05/2022 11:17:51 - INFO - codeparrot_training - Step 38991: {'lr': 0.00042681046398471693, 'samples': 19963904, 'steps': 38991, 'loss/train': 2.1073215007781982} +03/05/2022 11:17:52 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/05/2022 11:17:57 - INFO - codeparrot_training - Step 38992: {'lr': 0.00042680671222691325, 'samples': 19964416, 'steps': 38992, 'loss/train': 1.733243703842163} +03/05/2022 11:18:00 - INFO - codeparrot_training - Step 38993: {'lr': 0.0004268029603894433, 'samples': 19964928, 'steps': 38993, 'loss/train': 2.208637237548828} +03/05/2022 11:18:00 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/05/2022 11:18:05 - INFO - codeparrot_training - Step 38994: {'lr': 0.00042679920847230865, 'samples': 19965440, 'steps': 38994, 'loss/train': 1.5811805725097656} +03/05/2022 11:18:08 - INFO - codeparrot_training - Step 38995: {'lr': 0.000426795456475511, 'samples': 19965952, 'steps': 38995, 'loss/train': 1.5416536331176758} +03/05/2022 11:18:08 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 11:18:14 - INFO - codeparrot_training - Step 38996: {'lr': 0.00042679170439905204, 'samples': 19966464, 'steps': 38996, 'loss/train': 1.6714760065078735} +03/05/2022 11:18:17 - INFO - codeparrot_training - Step 38997: {'lr': 0.0004267879522429334, 'samples': 19966976, 'steps': 38997, 'loss/train': 1.8261314630508423} +03/05/2022 11:18:17 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/05/2022 11:18:22 - INFO - codeparrot_training - Step 38998: {'lr': 0.00042678420000715687, 'samples': 19967488, 'steps': 38998, 'loss/train': 2.248667001724243} +03/05/2022 11:18:25 - INFO - codeparrot_training - Step 38999: {'lr': 0.0004267804476917242, 'samples': 19968000, 'steps': 38999, 'loss/train': 2.644012212753296} +03/05/2022 11:18:25 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 11:18:31 - INFO - codeparrot_training - Step 39000: {'lr': 0.00042677669529663686, 'samples': 19968512, 'steps': 39000, 'loss/train': 1.4423401355743408} +03/05/2022 11:18:34 - INFO - codeparrot_training - Step 39001: {'lr': 0.0004267729428218968, 'samples': 19969024, 'steps': 39001, 'loss/train': 1.3501769304275513} +03/05/2022 11:18:35 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 11:18:39 - INFO - codeparrot_training - Step 39002: {'lr': 0.0004267691902675055, 'samples': 19969536, 'steps': 39002, 'loss/train': 1.4146558046340942} +03/05/2022 11:18:42 - INFO - codeparrot_training - Step 39003: {'lr': 0.0004267654376334647, 'samples': 19970048, 'steps': 39003, 'loss/train': 1.7157591581344604} +03/05/2022 11:18:43 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 11:18:48 - INFO - codeparrot_training - Step 39004: {'lr': 0.00042676168491977617, 'samples': 19970560, 'steps': 39004, 'loss/train': 2.011533498764038} +03/05/2022 11:18:51 - INFO - codeparrot_training - Step 39005: {'lr': 0.00042675793212644156, 'samples': 19971072, 'steps': 39005, 'loss/train': 0.9007067084312439} +03/05/2022 11:18:52 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 11:18:56 - INFO - codeparrot_training - Step 39006: {'lr': 0.00042675417925346255, 'samples': 19971584, 'steps': 39006, 'loss/train': 1.8242809772491455} +03/05/2022 11:18:59 - INFO - codeparrot_training - Step 39007: {'lr': 0.0004267504263008408, 'samples': 19972096, 'steps': 39007, 'loss/train': 1.398826241493225} +03/05/2022 11:19:00 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 11:19:04 - INFO - codeparrot_training - Step 39008: {'lr': 0.0004267466732685781, 'samples': 19972608, 'steps': 39008, 'loss/train': 1.4457794427871704} +03/05/2022 11:19:08 - INFO - codeparrot_training - Step 39009: {'lr': 0.000426742920156676, 'samples': 19973120, 'steps': 39009, 'loss/train': 1.3705793619155884} +03/05/2022 11:19:08 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/05/2022 11:19:13 - INFO - codeparrot_training - Step 39010: {'lr': 0.00042673916696513625, 'samples': 19973632, 'steps': 39010, 'loss/train': 1.8823187351226807} +03/05/2022 11:19:16 - INFO - codeparrot_training - Step 39011: {'lr': 0.0004267354136939607, 'samples': 19974144, 'steps': 39011, 'loss/train': 2.3398311138153076} +03/05/2022 11:19:16 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 11:19:21 - INFO - codeparrot_training - Step 39012: {'lr': 0.0004267316603431508, 'samples': 19974656, 'steps': 39012, 'loss/train': 1.3488638401031494} +03/05/2022 11:19:24 - INFO - codeparrot_training - Step 39013: {'lr': 0.00042672790691270835, 'samples': 19975168, 'steps': 39013, 'loss/train': 1.4324791431427002} +03/05/2022 11:19:24 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/05/2022 11:19:30 - INFO - codeparrot_training - Step 39014: {'lr': 0.00042672415340263507, 'samples': 19975680, 'steps': 39014, 'loss/train': 1.1562247276306152} +03/05/2022 11:19:33 - INFO - codeparrot_training - Step 39015: {'lr': 0.00042672039981293255, 'samples': 19976192, 'steps': 39015, 'loss/train': 2.0197999477386475} +03/05/2022 11:19:34 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/05/2022 11:19:38 - INFO - codeparrot_training - Step 39016: {'lr': 0.0004267166461436025, 'samples': 19976704, 'steps': 39016, 'loss/train': 0.9053488969802856} +03/05/2022 11:19:41 - INFO - codeparrot_training - Step 39017: {'lr': 0.0004267128923946468, 'samples': 19977216, 'steps': 39017, 'loss/train': 1.7771810293197632} +03/05/2022 11:19:42 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 11:19:47 - INFO - codeparrot_training - Step 39018: {'lr': 0.00042670913856606693, 'samples': 19977728, 'steps': 39018, 'loss/train': 1.502582311630249} +03/05/2022 11:19:50 - INFO - codeparrot_training - Step 39019: {'lr': 0.0004267053846578646, 'samples': 19978240, 'steps': 39019, 'loss/train': 1.471237301826477} +03/05/2022 11:19:50 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 11:19:55 - INFO - codeparrot_training - Step 39020: {'lr': 0.00042670163067004156, 'samples': 19978752, 'steps': 39020, 'loss/train': 1.9652624130249023} +03/05/2022 11:19:59 - INFO - codeparrot_training - Step 39021: {'lr': 0.00042669787660259956, 'samples': 19979264, 'steps': 39021, 'loss/train': 0.4530816972255707} +03/05/2022 11:19:59 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/05/2022 11:20:04 - INFO - codeparrot_training - Step 39022: {'lr': 0.0004266941224555402, 'samples': 19979776, 'steps': 39022, 'loss/train': 2.6839938163757324} +03/05/2022 11:20:07 - INFO - codeparrot_training - Step 39023: {'lr': 0.0004266903682288652, 'samples': 19980288, 'steps': 39023, 'loss/train': 1.3970366716384888} +03/05/2022 11:20:08 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/05/2022 11:20:12 - INFO - codeparrot_training - Step 39024: {'lr': 0.00042668661392257626, 'samples': 19980800, 'steps': 39024, 'loss/train': 1.744213342666626} +03/05/2022 11:20:15 - INFO - codeparrot_training - Step 39025: {'lr': 0.00042668285953667497, 'samples': 19981312, 'steps': 39025, 'loss/train': 1.6673506498336792} +03/05/2022 11:20:16 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 11:20:21 - INFO - codeparrot_training - Step 39026: {'lr': 0.0004266791050711632, 'samples': 19981824, 'steps': 39026, 'loss/train': 2.0671377182006836} +03/05/2022 11:20:24 - INFO - codeparrot_training - Step 39027: {'lr': 0.0004266753505260425, 'samples': 19982336, 'steps': 39027, 'loss/train': 1.5908591747283936} +03/05/2022 11:20:25 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/05/2022 11:20:29 - INFO - codeparrot_training - Step 39028: {'lr': 0.00042667159590131467, 'samples': 19982848, 'steps': 39028, 'loss/train': 2.464757204055786} +03/05/2022 11:20:32 - INFO - codeparrot_training - Step 39029: {'lr': 0.0004266678411969813, 'samples': 19983360, 'steps': 39029, 'loss/train': 1.2391139268875122} +03/05/2022 11:20:33 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 11:20:38 - INFO - codeparrot_training - Step 39030: {'lr': 0.0004266640864130441, 'samples': 19983872, 'steps': 39030, 'loss/train': 2.689448356628418} +03/05/2022 11:20:41 - INFO - codeparrot_training - Step 39031: {'lr': 0.00042666033154950485, 'samples': 19984384, 'steps': 39031, 'loss/train': 2.36437726020813} +03/05/2022 11:20:41 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 11:20:46 - INFO - codeparrot_training - Step 39032: {'lr': 0.00042665657660636517, 'samples': 19984896, 'steps': 39032, 'loss/train': 1.2376725673675537} +03/05/2022 11:20:49 - INFO - codeparrot_training - Step 39033: {'lr': 0.0004266528215836267, 'samples': 19985408, 'steps': 39033, 'loss/train': 1.7314574718475342} +03/05/2022 11:20:50 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/05/2022 11:20:54 - INFO - codeparrot_training - Step 39034: {'lr': 0.0004266490664812913, 'samples': 19985920, 'steps': 39034, 'loss/train': 2.633453607559204} +03/05/2022 11:20:58 - INFO - codeparrot_training - Step 39035: {'lr': 0.00042664531129936044, 'samples': 19986432, 'steps': 39035, 'loss/train': 1.098887324333191} +03/05/2022 11:20:58 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/05/2022 11:21:03 - INFO - codeparrot_training - Step 39036: {'lr': 0.00042664155603783606, 'samples': 19986944, 'steps': 39036, 'loss/train': 1.9525142908096313} +03/05/2022 11:21:06 - INFO - codeparrot_training - Step 39037: {'lr': 0.00042663780069671965, 'samples': 19987456, 'steps': 39037, 'loss/train': 1.3510466814041138} +03/05/2022 11:21:06 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 11:21:11 - INFO - codeparrot_training - Step 39038: {'lr': 0.00042663404527601293, 'samples': 19987968, 'steps': 39038, 'loss/train': 1.695973515510559} +03/05/2022 11:21:14 - INFO - codeparrot_training - Step 39039: {'lr': 0.00042663028977571774, 'samples': 19988480, 'steps': 39039, 'loss/train': 1.1089729070663452} +03/05/2022 11:21:15 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/05/2022 11:21:20 - INFO - codeparrot_training - Step 39040: {'lr': 0.0004266265341958355, 'samples': 19988992, 'steps': 39040, 'loss/train': 1.50751793384552} +03/05/2022 11:21:23 - INFO - codeparrot_training - Step 39041: {'lr': 0.0004266227785363682, 'samples': 19989504, 'steps': 39041, 'loss/train': 1.5025349855422974} +03/05/2022 11:21:23 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 11:21:28 - INFO - codeparrot_training - Step 39042: {'lr': 0.0004266190227973174, 'samples': 19990016, 'steps': 39042, 'loss/train': 1.910841941833496} +03/05/2022 11:21:31 - INFO - codeparrot_training - Step 39043: {'lr': 0.00042661526697868475, 'samples': 19990528, 'steps': 39043, 'loss/train': 1.483147144317627} +03/05/2022 11:21:31 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 11:21:36 - INFO - codeparrot_training - Step 39044: {'lr': 0.000426611511080472, 'samples': 19991040, 'steps': 39044, 'loss/train': 0.8413196802139282} +03/05/2022 11:21:39 - INFO - codeparrot_training - Step 39045: {'lr': 0.0004266077551026809, 'samples': 19991552, 'steps': 39045, 'loss/train': 1.4697948694229126} +03/05/2022 11:21:40 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/05/2022 11:21:45 - INFO - codeparrot_training - Step 39046: {'lr': 0.000426603999045313, 'samples': 19992064, 'steps': 39046, 'loss/train': 1.2267876863479614} +03/05/2022 11:21:48 - INFO - codeparrot_training - Step 39047: {'lr': 0.00042660024290837003, 'samples': 19992576, 'steps': 39047, 'loss/train': 1.883570671081543} +03/05/2022 11:21:48 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/05/2022 11:21:53 - INFO - codeparrot_training - Step 39048: {'lr': 0.00042659648669185376, 'samples': 19993088, 'steps': 39048, 'loss/train': 0.9740869998931885} +03/05/2022 11:21:56 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 11:21:58 - INFO - codeparrot_training - Step 39049: {'lr': 0.0004265927303957658, 'samples': 19993600, 'steps': 39049, 'loss/train': 1.70529305934906} +03/05/2022 11:22:02 - INFO - codeparrot_training - Step 39050: {'lr': 0.0004265889740201079, 'samples': 19994112, 'steps': 39050, 'loss/train': 1.9013792276382446} +03/05/2022 11:22:05 - INFO - codeparrot_training - Step 39051: {'lr': 0.0004265852175648818, 'samples': 19994624, 'steps': 39051, 'loss/train': 2.120541572570801} +03/05/2022 11:22:05 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 11:22:10 - INFO - codeparrot_training - Step 39052: {'lr': 0.00042658146103008904, 'samples': 19995136, 'steps': 39052, 'loss/train': 2.5940370559692383} +03/05/2022 11:22:13 - INFO - codeparrot_training - Step 39053: {'lr': 0.0004265777044157314, 'samples': 19995648, 'steps': 39053, 'loss/train': 2.0096001625061035} +03/05/2022 11:22:14 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 11:22:18 - INFO - codeparrot_training - Step 39054: {'lr': 0.0004265739477218106, 'samples': 19996160, 'steps': 39054, 'loss/train': 1.5099202394485474} +03/05/2022 11:22:22 - INFO - codeparrot_training - Step 39055: {'lr': 0.0004265701909483283, 'samples': 19996672, 'steps': 39055, 'loss/train': 1.1148078441619873} +03/05/2022 11:22:22 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/05/2022 11:22:27 - INFO - codeparrot_training - Step 39056: {'lr': 0.0004265664340952862, 'samples': 19997184, 'steps': 39056, 'loss/train': 1.7687036991119385} +03/05/2022 11:22:30 - INFO - codeparrot_training - Step 39057: {'lr': 0.00042656267716268596, 'samples': 19997696, 'steps': 39057, 'loss/train': 2.134798049926758} +03/05/2022 11:22:30 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/05/2022 11:22:35 - INFO - codeparrot_training - Step 39058: {'lr': 0.00042655892015052945, 'samples': 19998208, 'steps': 39058, 'loss/train': 1.2702082395553589} +03/05/2022 11:22:38 - INFO - codeparrot_training - Step 39059: {'lr': 0.00042655516305881803, 'samples': 19998720, 'steps': 39059, 'loss/train': 2.060908317565918} +03/05/2022 11:22:39 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/05/2022 11:22:44 - INFO - codeparrot_training - Step 39060: {'lr': 0.00042655140588755366, 'samples': 19999232, 'steps': 39060, 'loss/train': 2.0070769786834717} +03/05/2022 11:22:47 - INFO - codeparrot_training - Step 39061: {'lr': 0.0004265476486367379, 'samples': 19999744, 'steps': 39061, 'loss/train': 2.6038737297058105} +03/05/2022 11:22:48 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 11:22:52 - INFO - codeparrot_training - Step 39062: {'lr': 0.00042654389130637255, 'samples': 20000256, 'steps': 39062, 'loss/train': 1.4936790466308594} +03/05/2022 11:22:55 - INFO - codeparrot_training - Step 39063: {'lr': 0.0004265401338964592, 'samples': 20000768, 'steps': 39063, 'loss/train': 1.6675760746002197} +03/05/2022 11:22:56 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 11:23:01 - INFO - codeparrot_training - Step 39064: {'lr': 0.0004265363764069997, 'samples': 20001280, 'steps': 39064, 'loss/train': 1.8414729833602905} +03/05/2022 11:23:04 - INFO - codeparrot_training - Step 39065: {'lr': 0.0004265326188379955, 'samples': 20001792, 'steps': 39065, 'loss/train': 1.8442939519882202} +03/05/2022 11:23:05 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 11:23:09 - INFO - codeparrot_training - Step 39066: {'lr': 0.00042652886118944844, 'samples': 20002304, 'steps': 39066, 'loss/train': 2.2264957427978516} +03/05/2022 11:23:12 - INFO - codeparrot_training - Step 39067: {'lr': 0.0004265251034613603, 'samples': 20002816, 'steps': 39067, 'loss/train': 1.5313924551010132} +03/05/2022 11:23:13 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 11:23:18 - INFO - codeparrot_training - Step 39068: {'lr': 0.0004265213456537326, 'samples': 20003328, 'steps': 39068, 'loss/train': 2.2438547611236572} +03/05/2022 11:23:21 - INFO - codeparrot_training - Step 39069: {'lr': 0.0004265175877665671, 'samples': 20003840, 'steps': 39069, 'loss/train': 0.401152104139328} +03/05/2022 11:23:21 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 11:23:26 - INFO - codeparrot_training - Step 39070: {'lr': 0.0004265138297998655, 'samples': 20004352, 'steps': 39070, 'loss/train': 2.0953726768493652} +03/05/2022 11:23:29 - INFO - codeparrot_training - Step 39071: {'lr': 0.0004265100717536295, 'samples': 20004864, 'steps': 39071, 'loss/train': 1.6544808149337769} +03/05/2022 11:23:30 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 11:23:35 - INFO - codeparrot_training - Step 39072: {'lr': 0.0004265063136278608, 'samples': 20005376, 'steps': 39072, 'loss/train': 0.7623831629753113} +03/05/2022 11:23:38 - INFO - codeparrot_training - Step 39073: {'lr': 0.00042650255542256107, 'samples': 20005888, 'steps': 39073, 'loss/train': 1.9172495603561401} +03/05/2022 11:23:38 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 11:23:43 - INFO - codeparrot_training - Step 39074: {'lr': 0.000426498797137732, 'samples': 20006400, 'steps': 39074, 'loss/train': 1.7618412971496582} +03/05/2022 11:23:46 - INFO - codeparrot_training - Step 39075: {'lr': 0.00042649503877337523, 'samples': 20006912, 'steps': 39075, 'loss/train': 1.6913955211639404} +03/05/2022 11:23:47 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/05/2022 11:23:51 - INFO - codeparrot_training - Step 39076: {'lr': 0.0004264912803294926, 'samples': 20007424, 'steps': 39076, 'loss/train': 1.1596757173538208} +03/05/2022 11:23:54 - INFO - codeparrot_training - Step 39077: {'lr': 0.0004264875218060857, 'samples': 20007936, 'steps': 39077, 'loss/train': 1.4177894592285156} +03/05/2022 11:23:55 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/05/2022 11:24:00 - INFO - codeparrot_training - Step 39078: {'lr': 0.00042648376320315634, 'samples': 20008448, 'steps': 39078, 'loss/train': 2.1967215538024902} +03/05/2022 11:24:03 - INFO - codeparrot_training - Step 39079: {'lr': 0.000426480004520706, 'samples': 20008960, 'steps': 39079, 'loss/train': 1.406382441520691} +03/05/2022 11:24:03 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 11:24:08 - INFO - codeparrot_training - Step 39080: {'lr': 0.00042647624575873656, 'samples': 20009472, 'steps': 39080, 'loss/train': 1.7468113899230957} +03/05/2022 11:24:12 - INFO - codeparrot_training - Step 39081: {'lr': 0.0004264724869172496, 'samples': 20009984, 'steps': 39081, 'loss/train': 1.6698496341705322} +03/05/2022 11:24:12 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/05/2022 11:24:17 - INFO - codeparrot_training - Step 39082: {'lr': 0.00042646872799624694, 'samples': 20010496, 'steps': 39082, 'loss/train': 2.103158473968506} +03/05/2022 11:24:20 - INFO - codeparrot_training - Step 39083: {'lr': 0.00042646496899573005, 'samples': 20011008, 'steps': 39083, 'loss/train': 1.3880624771118164} +03/05/2022 11:24:20 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 11:24:25 - INFO - codeparrot_training - Step 39084: {'lr': 0.0004264612099157009, 'samples': 20011520, 'steps': 39084, 'loss/train': 1.5436391830444336} +03/05/2022 11:24:28 - INFO - codeparrot_training - Step 39085: {'lr': 0.00042645745075616106, 'samples': 20012032, 'steps': 39085, 'loss/train': 1.1677860021591187} +03/05/2022 11:24:29 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 11:24:34 - INFO - codeparrot_training - Step 39086: {'lr': 0.0004264536915171121, 'samples': 20012544, 'steps': 39086, 'loss/train': 0.7436908483505249} +03/05/2022 11:24:37 - INFO - codeparrot_training - Step 39087: {'lr': 0.0004264499321985559, 'samples': 20013056, 'steps': 39087, 'loss/train': 1.8198416233062744} +03/05/2022 11:24:37 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 11:24:42 - INFO - codeparrot_training - Step 39088: {'lr': 0.0004264461728004941, 'samples': 20013568, 'steps': 39088, 'loss/train': 1.9524335861206055} +03/05/2022 11:24:45 - INFO - codeparrot_training - Step 39089: {'lr': 0.0004264424133229283, 'samples': 20014080, 'steps': 39089, 'loss/train': 2.121626615524292} +03/05/2022 11:24:45 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 11:24:51 - INFO - codeparrot_training - Step 39090: {'lr': 0.0004264386537658603, 'samples': 20014592, 'steps': 39090, 'loss/train': 1.7298251390457153} +03/05/2022 11:24:54 - INFO - codeparrot_training - Step 39091: {'lr': 0.0004264348941292919, 'samples': 20015104, 'steps': 39091, 'loss/train': 1.5489697456359863} +03/05/2022 11:24:54 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/05/2022 11:24:59 - INFO - codeparrot_training - Step 39092: {'lr': 0.0004264311344132245, 'samples': 20015616, 'steps': 39092, 'loss/train': 1.9535722732543945} +03/05/2022 11:25:02 - INFO - codeparrot_training - Step 39093: {'lr': 0.00042642737461766003, 'samples': 20016128, 'steps': 39093, 'loss/train': 0.5044448971748352} +03/05/2022 11:25:03 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/05/2022 11:25:08 - INFO - codeparrot_training - Step 39094: {'lr': 0.0004264236147426, 'samples': 20016640, 'steps': 39094, 'loss/train': 1.9438934326171875} +03/05/2022 11:25:11 - INFO - codeparrot_training - Step 39095: {'lr': 0.0004264198547880464, 'samples': 20017152, 'steps': 39095, 'loss/train': 1.9148969650268555} +03/05/2022 11:25:11 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 11:25:16 - INFO - codeparrot_training - Step 39096: {'lr': 0.00042641609475400054, 'samples': 20017664, 'steps': 39096, 'loss/train': 0.7560935020446777} +03/05/2022 11:25:19 - INFO - codeparrot_training - Step 39097: {'lr': 0.0004264123346404644, 'samples': 20018176, 'steps': 39097, 'loss/train': 1.0892333984375} +03/05/2022 11:25:19 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/05/2022 11:25:25 - INFO - codeparrot_training - Step 39098: {'lr': 0.0004264085744474396, 'samples': 20018688, 'steps': 39098, 'loss/train': 1.7393109798431396} +03/05/2022 11:25:28 - INFO - codeparrot_training - Step 39099: {'lr': 0.0004264048141749278, 'samples': 20019200, 'steps': 39099, 'loss/train': 1.998140573501587} +03/05/2022 11:25:28 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 11:25:33 - INFO - codeparrot_training - Step 39100: {'lr': 0.00042640105382293073, 'samples': 20019712, 'steps': 39100, 'loss/train': 1.3697302341461182} +03/05/2022 11:25:36 - INFO - codeparrot_training - Step 39101: {'lr': 0.00042639729339145004, 'samples': 20020224, 'steps': 39101, 'loss/train': 1.869065761566162} +03/05/2022 11:25:36 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/05/2022 11:25:42 - INFO - codeparrot_training - Step 39102: {'lr': 0.0004263935328804874, 'samples': 20020736, 'steps': 39102, 'loss/train': 1.7753989696502686} +03/05/2022 11:25:44 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 11:25:47 - INFO - codeparrot_training - Step 39103: {'lr': 0.0004263897722900447, 'samples': 20021248, 'steps': 39103, 'loss/train': 0.828503429889679} +03/05/2022 11:25:50 - INFO - codeparrot_training - Step 39104: {'lr': 0.0004263860116201234, 'samples': 20021760, 'steps': 39104, 'loss/train': 1.2966006994247437} +03/05/2022 11:25:54 - INFO - codeparrot_training - Step 39105: {'lr': 0.00042638225087072523, 'samples': 20022272, 'steps': 39105, 'loss/train': 1.6922987699508667} +03/05/2022 11:25:54 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 11:25:59 - INFO - codeparrot_training - Step 39106: {'lr': 0.00042637849004185203, 'samples': 20022784, 'steps': 39106, 'loss/train': 1.375722050666809} +03/05/2022 11:26:02 - INFO - codeparrot_training - Step 39107: {'lr': 0.0004263747291335054, 'samples': 20023296, 'steps': 39107, 'loss/train': 1.4237117767333984} +03/05/2022 11:26:02 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 11:26:07 - INFO - codeparrot_training - Step 39108: {'lr': 0.00042637096814568696, 'samples': 20023808, 'steps': 39108, 'loss/train': 1.5423437356948853} +03/05/2022 11:26:10 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 11:26:13 - INFO - codeparrot_training - Step 39109: {'lr': 0.0004263672070783986, 'samples': 20024320, 'steps': 39109, 'loss/train': 0.9405554533004761} +03/05/2022 11:26:16 - INFO - codeparrot_training - Step 39110: {'lr': 0.0004263634459316418, 'samples': 20024832, 'steps': 39110, 'loss/train': 1.620505690574646} +03/05/2022 11:26:19 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/05/2022 11:26:21 - INFO - codeparrot_training - Step 39111: {'lr': 0.0004263596847054184, 'samples': 20025344, 'steps': 39111, 'loss/train': 2.268878221511841} +03/05/2022 11:26:24 - INFO - codeparrot_training - Step 39112: {'lr': 0.00042635592339973006, 'samples': 20025856, 'steps': 39112, 'loss/train': 2.8147025108337402} +03/05/2022 11:26:27 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 11:26:29 - INFO - codeparrot_training - Step 39113: {'lr': 0.00042635216201457836, 'samples': 20026368, 'steps': 39113, 'loss/train': 2.2718677520751953} +03/05/2022 11:26:33 - INFO - codeparrot_training - Step 39114: {'lr': 0.00042634840054996527, 'samples': 20026880, 'steps': 39114, 'loss/train': 1.9831773042678833} +03/05/2022 11:26:35 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 11:26:38 - INFO - codeparrot_training - Step 39115: {'lr': 0.00042634463900589214, 'samples': 20027392, 'steps': 39115, 'loss/train': 1.8290053606033325} +03/05/2022 11:26:41 - INFO - codeparrot_training - Step 39116: {'lr': 0.0004263408773823609, 'samples': 20027904, 'steps': 39116, 'loss/train': 1.9194083213806152} +03/05/2022 11:26:44 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 11:26:47 - INFO - codeparrot_training - Step 39117: {'lr': 0.00042633711567937325, 'samples': 20028416, 'steps': 39117, 'loss/train': 0.9745329022407532} +03/05/2022 11:26:50 - INFO - codeparrot_training - Step 39118: {'lr': 0.00042633335389693073, 'samples': 20028928, 'steps': 39118, 'loss/train': 2.0196757316589355} +03/05/2022 11:26:53 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 11:26:55 - INFO - codeparrot_training - Step 39119: {'lr': 0.0004263295920350352, 'samples': 20029440, 'steps': 39119, 'loss/train': 1.5921719074249268} +03/05/2022 11:26:58 - INFO - codeparrot_training - Step 39120: {'lr': 0.0004263258300936882, 'samples': 20029952, 'steps': 39120, 'loss/train': 0.736976683139801} +03/05/2022 11:27:01 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/05/2022 11:27:03 - INFO - codeparrot_training - Step 39121: {'lr': 0.00042632206807289154, 'samples': 20030464, 'steps': 39121, 'loss/train': 1.0121369361877441} +03/05/2022 11:27:07 - INFO - codeparrot_training - Step 39122: {'lr': 0.00042631830597264687, 'samples': 20030976, 'steps': 39122, 'loss/train': 2.478888511657715} +03/05/2022 11:27:09 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 11:27:12 - INFO - codeparrot_training - Step 39123: {'lr': 0.0004263145437929559, 'samples': 20031488, 'steps': 39123, 'loss/train': 1.665016531944275} +03/05/2022 11:27:15 - INFO - codeparrot_training - Step 39124: {'lr': 0.0004263107815338203, 'samples': 20032000, 'steps': 39124, 'loss/train': 2.112304449081421} +03/05/2022 11:27:19 - INFO - codeparrot_training - Step 39125: {'lr': 0.00042630701919524176, 'samples': 20032512, 'steps': 39125, 'loss/train': 1.4886009693145752} +03/05/2022 11:27:20 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/05/2022 11:27:24 - INFO - codeparrot_training - Step 39126: {'lr': 0.00042630325677722204, 'samples': 20033024, 'steps': 39126, 'loss/train': 1.041352391242981} +03/05/2022 11:27:27 - INFO - codeparrot_training - Step 39127: {'lr': 0.0004262994942797628, 'samples': 20033536, 'steps': 39127, 'loss/train': 2.1906192302703857} +03/05/2022 11:27:28 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/05/2022 11:27:32 - INFO - codeparrot_training - Step 39128: {'lr': 0.0004262957317028657, 'samples': 20034048, 'steps': 39128, 'loss/train': 1.2582366466522217} +03/05/2022 11:27:36 - INFO - codeparrot_training - Step 39129: {'lr': 0.00042629196904653245, 'samples': 20034560, 'steps': 39129, 'loss/train': 1.5137524604797363} +03/05/2022 11:27:37 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 11:27:41 - INFO - codeparrot_training - Step 39130: {'lr': 0.00042628820631076484, 'samples': 20035072, 'steps': 39130, 'loss/train': 1.637446403503418} +03/05/2022 11:27:44 - INFO - codeparrot_training - Step 39131: {'lr': 0.0004262844434955644, 'samples': 20035584, 'steps': 39131, 'loss/train': 1.9151180982589722} +03/05/2022 11:27:45 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 11:27:49 - INFO - codeparrot_training - Step 39132: {'lr': 0.00042628068060093294, 'samples': 20036096, 'steps': 39132, 'loss/train': 1.630250096321106} +03/05/2022 11:27:52 - INFO - codeparrot_training - Step 39133: {'lr': 0.0004262769176268722, 'samples': 20036608, 'steps': 39133, 'loss/train': 1.9063870906829834} +03/05/2022 11:27:53 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 11:27:58 - INFO - codeparrot_training - Step 39134: {'lr': 0.0004262731545733837, 'samples': 20037120, 'steps': 39134, 'loss/train': 1.768835186958313} +03/05/2022 11:28:01 - INFO - codeparrot_training - Step 39135: {'lr': 0.0004262693914404692, 'samples': 20037632, 'steps': 39135, 'loss/train': 1.9593032598495483} +03/05/2022 11:28:02 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 11:28:06 - INFO - codeparrot_training - Step 39136: {'lr': 0.0004262656282281305, 'samples': 20038144, 'steps': 39136, 'loss/train': 1.2500556707382202} +03/05/2022 11:28:09 - INFO - codeparrot_training - Step 39137: {'lr': 0.0004262618649363692, 'samples': 20038656, 'steps': 39137, 'loss/train': 1.6795276403427124} +03/05/2022 11:28:10 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 11:28:14 - INFO - codeparrot_training - Step 39138: {'lr': 0.0004262581015651871, 'samples': 20039168, 'steps': 39138, 'loss/train': 1.1863007545471191} +03/05/2022 11:28:18 - INFO - codeparrot_training - Step 39139: {'lr': 0.0004262543381145857, 'samples': 20039680, 'steps': 39139, 'loss/train': 2.330703020095825} +03/05/2022 11:28:18 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 11:28:23 - INFO - codeparrot_training - Step 39140: {'lr': 0.0004262505745845669, 'samples': 20040192, 'steps': 39140, 'loss/train': 1.2345930337905884} +03/05/2022 11:28:26 - INFO - codeparrot_training - Step 39141: {'lr': 0.0004262468109751323, 'samples': 20040704, 'steps': 39141, 'loss/train': 1.7980554103851318} +03/05/2022 11:28:27 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 11:28:31 - INFO - codeparrot_training - Step 39142: {'lr': 0.0004262430472862836, 'samples': 20041216, 'steps': 39142, 'loss/train': 1.4580658674240112} +03/05/2022 11:28:34 - INFO - codeparrot_training - Step 39143: {'lr': 0.00042623928351802245, 'samples': 20041728, 'steps': 39143, 'loss/train': 1.6295183897018433} +03/05/2022 11:28:35 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 11:28:40 - INFO - codeparrot_training - Step 39144: {'lr': 0.00042623551967035066, 'samples': 20042240, 'steps': 39144, 'loss/train': 2.397862434387207} +03/05/2022 11:28:43 - INFO - codeparrot_training - Step 39145: {'lr': 0.0004262317557432699, 'samples': 20042752, 'steps': 39145, 'loss/train': 1.4817488193511963} +03/05/2022 11:28:43 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 11:28:48 - INFO - codeparrot_training - Step 39146: {'lr': 0.0004262279917367817, 'samples': 20043264, 'steps': 39146, 'loss/train': 1.7306450605392456} +03/05/2022 11:28:51 - INFO - codeparrot_training - Step 39147: {'lr': 0.00042622422765088805, 'samples': 20043776, 'steps': 39147, 'loss/train': 1.9909801483154297} +03/05/2022 11:28:51 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 11:28:57 - INFO - codeparrot_training - Step 39148: {'lr': 0.00042622046348559034, 'samples': 20044288, 'steps': 39148, 'loss/train': 1.649040699005127} +03/05/2022 11:29:00 - INFO - codeparrot_training - Step 39149: {'lr': 0.00042621669924089044, 'samples': 20044800, 'steps': 39149, 'loss/train': 0.8156708478927612} +03/05/2022 11:29:00 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 11:29:05 - INFO - codeparrot_training - Step 39150: {'lr': 0.00042621293491679007, 'samples': 20045312, 'steps': 39150, 'loss/train': 1.2928657531738281} +03/05/2022 11:29:08 - INFO - codeparrot_training - Step 39151: {'lr': 0.00042620917051329086, 'samples': 20045824, 'steps': 39151, 'loss/train': 2.0366790294647217} +03/05/2022 11:29:09 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 11:29:14 - INFO - codeparrot_training - Step 39152: {'lr': 0.0004262054060303945, 'samples': 20046336, 'steps': 39152, 'loss/train': 2.18139386177063} +03/05/2022 11:29:17 - INFO - codeparrot_training - Step 39153: {'lr': 0.00042620164146810267, 'samples': 20046848, 'steps': 39153, 'loss/train': 3.1144769191741943} +03/05/2022 11:29:18 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 11:29:22 - INFO - codeparrot_training - Step 39154: {'lr': 0.0004261978768264172, 'samples': 20047360, 'steps': 39154, 'loss/train': 1.9984711408615112} +03/05/2022 11:29:25 - INFO - codeparrot_training - Step 39155: {'lr': 0.00042619411210533957, 'samples': 20047872, 'steps': 39155, 'loss/train': 2.014003038406372} +03/05/2022 11:29:26 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 11:29:30 - INFO - codeparrot_training - Step 39156: {'lr': 0.00042619034730487167, 'samples': 20048384, 'steps': 39156, 'loss/train': 1.2428696155548096} +03/05/2022 11:29:34 - INFO - codeparrot_training - Step 39157: {'lr': 0.00042618658242501507, 'samples': 20048896, 'steps': 39157, 'loss/train': 1.401235580444336} +03/05/2022 11:29:34 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 11:29:39 - INFO - codeparrot_training - Step 39158: {'lr': 0.0004261828174657716, 'samples': 20049408, 'steps': 39158, 'loss/train': 1.4573992490768433} +03/05/2022 11:29:42 - INFO - codeparrot_training - Step 39159: {'lr': 0.0004261790524271427, 'samples': 20049920, 'steps': 39159, 'loss/train': 1.9141509532928467} +03/05/2022 11:29:43 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/05/2022 11:29:47 - INFO - codeparrot_training - Step 39160: {'lr': 0.00042617528730913036, 'samples': 20050432, 'steps': 39160, 'loss/train': 1.8278671503067017} +03/05/2022 11:29:51 - INFO - codeparrot_training - Step 39161: {'lr': 0.00042617152211173615, 'samples': 20050944, 'steps': 39161, 'loss/train': 2.496934175491333} +03/05/2022 11:29:53 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 11:29:56 - INFO - codeparrot_training - Step 39162: {'lr': 0.0004261677568349618, 'samples': 20051456, 'steps': 39162, 'loss/train': 1.7759982347488403} +03/05/2022 11:29:59 - INFO - codeparrot_training - Step 39163: {'lr': 0.0004261639914788089, 'samples': 20051968, 'steps': 39163, 'loss/train': 2.133173704147339} +03/05/2022 11:30:01 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/05/2022 11:30:04 - INFO - codeparrot_training - Step 39164: {'lr': 0.0004261602260432792, 'samples': 20052480, 'steps': 39164, 'loss/train': 1.7375026941299438} +03/05/2022 11:30:07 - INFO - codeparrot_training - Step 39165: {'lr': 0.0004261564605283745, 'samples': 20052992, 'steps': 39165, 'loss/train': 1.407943844795227} +03/05/2022 11:30:09 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/05/2022 11:30:13 - INFO - codeparrot_training - Step 39166: {'lr': 0.0004261526949340965, 'samples': 20053504, 'steps': 39166, 'loss/train': 1.604474425315857} +03/05/2022 11:30:16 - INFO - codeparrot_training - Step 39167: {'lr': 0.0004261489292604467, 'samples': 20054016, 'steps': 39167, 'loss/train': 1.9670841693878174} +03/05/2022 11:30:21 - INFO - codeparrot_training - Step 39168: {'lr': 0.0004261451635074269, 'samples': 20054528, 'steps': 39168, 'loss/train': 1.29154372215271} +03/05/2022 11:30:24 - INFO - codeparrot_training - Step 39169: {'lr': 0.0004261413976750388, 'samples': 20055040, 'steps': 39169, 'loss/train': 1.3283538818359375} +03/05/2022 11:30:26 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 11:30:29 - INFO - codeparrot_training - Step 39170: {'lr': 0.00042613763176328415, 'samples': 20055552, 'steps': 39170, 'loss/train': 1.0437188148498535} +03/05/2022 11:30:33 - INFO - codeparrot_training - Step 39171: {'lr': 0.00042613386577216455, 'samples': 20056064, 'steps': 39171, 'loss/train': 1.861180067062378} +03/05/2022 11:30:34 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 11:30:38 - INFO - codeparrot_training - Step 39172: {'lr': 0.0004261300997016818, 'samples': 20056576, 'steps': 39172, 'loss/train': 1.7584348917007446} +03/05/2022 11:30:41 - INFO - codeparrot_training - Step 39173: {'lr': 0.0004261263335518375, 'samples': 20057088, 'steps': 39173, 'loss/train': 2.1167593002319336} +03/05/2022 11:30:43 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/05/2022 11:30:46 - INFO - codeparrot_training - Step 39174: {'lr': 0.00042612256732263345, 'samples': 20057600, 'steps': 39174, 'loss/train': 2.011868715286255} +03/05/2022 11:30:50 - INFO - codeparrot_training - Step 39175: {'lr': 0.0004261188010140712, 'samples': 20058112, 'steps': 39175, 'loss/train': 2.1888649463653564} +03/05/2022 11:30:51 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/05/2022 11:30:55 - INFO - codeparrot_training - Step 39176: {'lr': 0.00042611503462615266, 'samples': 20058624, 'steps': 39176, 'loss/train': 2.2671525478363037} +03/05/2022 11:30:58 - INFO - codeparrot_training - Step 39177: {'lr': 0.0004261112681588793, 'samples': 20059136, 'steps': 39177, 'loss/train': 1.8337476253509521} +03/05/2022 11:30:59 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 11:31:03 - INFO - codeparrot_training - Step 39178: {'lr': 0.000426107501612253, 'samples': 20059648, 'steps': 39178, 'loss/train': 1.7727893590927124} +03/05/2022 11:31:06 - INFO - codeparrot_training - Step 39179: {'lr': 0.0004261037349862753, 'samples': 20060160, 'steps': 39179, 'loss/train': 0.7417510747909546} +03/05/2022 11:31:07 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 11:31:12 - INFO - codeparrot_training - Step 39180: {'lr': 0.000426099968280948, 'samples': 20060672, 'steps': 39180, 'loss/train': 1.3271936178207397} +03/05/2022 11:31:15 - INFO - codeparrot_training - Step 39181: {'lr': 0.00042609620149627284, 'samples': 20061184, 'steps': 39181, 'loss/train': 1.4863970279693604} +03/05/2022 11:31:16 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/05/2022 11:31:20 - INFO - codeparrot_training - Step 39182: {'lr': 0.00042609243463225134, 'samples': 20061696, 'steps': 39182, 'loss/train': 1.324421763420105} +03/05/2022 11:31:23 - INFO - codeparrot_training - Step 39183: {'lr': 0.00042608866768888533, 'samples': 20062208, 'steps': 39183, 'loss/train': 2.060624122619629} +03/05/2022 11:31:24 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 11:31:29 - INFO - codeparrot_training - Step 39184: {'lr': 0.0004260849006661765, 'samples': 20062720, 'steps': 39184, 'loss/train': 0.9789690375328064} +03/05/2022 11:31:32 - INFO - codeparrot_training - Step 39185: {'lr': 0.0004260811335641266, 'samples': 20063232, 'steps': 39185, 'loss/train': 0.10304628312587738} +03/05/2022 11:31:33 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 11:31:37 - INFO - codeparrot_training - Step 39186: {'lr': 0.0004260773663827372, 'samples': 20063744, 'steps': 39186, 'loss/train': 0.888137698173523} +03/05/2022 11:31:40 - INFO - codeparrot_training - Step 39187: {'lr': 0.00042607359912201004, 'samples': 20064256, 'steps': 39187, 'loss/train': 2.519382953643799} +03/05/2022 11:31:41 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/05/2022 11:31:46 - INFO - codeparrot_training - Step 39188: {'lr': 0.0004260698317819468, 'samples': 20064768, 'steps': 39188, 'loss/train': 1.6876165866851807} +03/05/2022 11:31:49 - INFO - codeparrot_training - Step 39189: {'lr': 0.00042606606436254926, 'samples': 20065280, 'steps': 39189, 'loss/train': 2.177304267883301} +03/05/2022 11:31:50 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 11:31:54 - INFO - codeparrot_training - Step 39190: {'lr': 0.000426062296863819, 'samples': 20065792, 'steps': 39190, 'loss/train': 1.6838548183441162} +03/05/2022 11:31:57 - INFO - codeparrot_training - Step 39191: {'lr': 0.00042605852928575796, 'samples': 20066304, 'steps': 39191, 'loss/train': 1.455576777458191} +03/05/2022 11:31:58 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/05/2022 11:32:02 - INFO - codeparrot_training - Step 39192: {'lr': 0.00042605476162836756, 'samples': 20066816, 'steps': 39192, 'loss/train': 1.828658103942871} +03/05/2022 11:32:06 - INFO - codeparrot_training - Step 39193: {'lr': 0.00042605099389164957, 'samples': 20067328, 'steps': 39193, 'loss/train': 1.7807128429412842} +03/05/2022 11:32:06 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/05/2022 11:32:11 - INFO - codeparrot_training - Step 39194: {'lr': 0.00042604722607560575, 'samples': 20067840, 'steps': 39194, 'loss/train': 1.7363500595092773} +03/05/2022 11:32:14 - INFO - codeparrot_training - Step 39195: {'lr': 0.0004260434581802377, 'samples': 20068352, 'steps': 39195, 'loss/train': 0.9399168491363525} +03/05/2022 11:32:14 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 11:32:19 - INFO - codeparrot_training - Step 39196: {'lr': 0.0004260396902055473, 'samples': 20068864, 'steps': 39196, 'loss/train': 1.4609107971191406} +03/05/2022 11:32:22 - INFO - codeparrot_training - Step 39197: {'lr': 0.0004260359221515361, 'samples': 20069376, 'steps': 39197, 'loss/train': 1.13994300365448} +03/05/2022 11:32:23 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 11:32:28 - INFO - codeparrot_training - Step 39198: {'lr': 0.0004260321540182057, 'samples': 20069888, 'steps': 39198, 'loss/train': 1.7611937522888184} +03/05/2022 11:32:31 - INFO - codeparrot_training - Step 39199: {'lr': 0.00042602838580555814, 'samples': 20070400, 'steps': 39199, 'loss/train': 1.859106183052063} +03/05/2022 11:32:32 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 11:32:36 - INFO - codeparrot_training - Step 39200: {'lr': 0.0004260246175135948, 'samples': 20070912, 'steps': 39200, 'loss/train': 1.0504287481307983} +03/05/2022 11:32:39 - INFO - codeparrot_training - Step 39201: {'lr': 0.00042602084914231743, 'samples': 20071424, 'steps': 39201, 'loss/train': 1.8610565662384033} +03/05/2022 11:32:40 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 11:32:45 - INFO - codeparrot_training - Step 39202: {'lr': 0.0004260170806917278, 'samples': 20071936, 'steps': 39202, 'loss/train': 0.9496520757675171} +03/05/2022 11:32:48 - INFO - codeparrot_training - Step 39203: {'lr': 0.0004260133121618276, 'samples': 20072448, 'steps': 39203, 'loss/train': 1.821781873703003} +03/05/2022 11:32:48 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/05/2022 11:32:53 - INFO - codeparrot_training - Step 39204: {'lr': 0.0004260095435526186, 'samples': 20072960, 'steps': 39204, 'loss/train': 1.6188546419143677} +03/05/2022 11:32:56 - INFO - codeparrot_training - Step 39205: {'lr': 0.0004260057748641024, 'samples': 20073472, 'steps': 39205, 'loss/train': 1.6171358823776245} +03/05/2022 11:32:57 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 11:33:02 - INFO - codeparrot_training - Step 39206: {'lr': 0.00042600200609628063, 'samples': 20073984, 'steps': 39206, 'loss/train': 1.3141547441482544} +03/05/2022 11:33:05 - INFO - codeparrot_training - Step 39207: {'lr': 0.0004259982372491551, 'samples': 20074496, 'steps': 39207, 'loss/train': 1.2619725465774536} +03/05/2022 11:33:05 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 11:33:10 - INFO - codeparrot_training - Step 39208: {'lr': 0.00042599446832272746, 'samples': 20075008, 'steps': 39208, 'loss/train': 1.1962252855300903} +03/05/2022 11:33:13 - INFO - codeparrot_training - Step 39209: {'lr': 0.0004259906993169995, 'samples': 20075520, 'steps': 39209, 'loss/train': 1.0370994806289673} +03/05/2022 11:33:13 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/05/2022 11:33:18 - INFO - codeparrot_training - Step 39210: {'lr': 0.00042598693023197283, 'samples': 20076032, 'steps': 39210, 'loss/train': 1.5981732606887817} +03/05/2022 11:33:21 - INFO - codeparrot_training - Step 39211: {'lr': 0.00042598316106764913, 'samples': 20076544, 'steps': 39211, 'loss/train': 0.6616408824920654} +03/05/2022 11:33:22 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 11:33:27 - INFO - codeparrot_training - Step 39212: {'lr': 0.0004259793918240302, 'samples': 20077056, 'steps': 39212, 'loss/train': 1.0613933801651} +03/05/2022 11:33:30 - INFO - codeparrot_training - Step 39213: {'lr': 0.00042597562250111753, 'samples': 20077568, 'steps': 39213, 'loss/train': 1.500213623046875} +03/05/2022 11:33:30 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 11:33:35 - INFO - codeparrot_training - Step 39214: {'lr': 0.00042597185309891305, 'samples': 20078080, 'steps': 39214, 'loss/train': 1.5688179731369019} +03/05/2022 11:33:38 - INFO - codeparrot_training - Step 39215: {'lr': 0.0004259680836174184, 'samples': 20078592, 'steps': 39215, 'loss/train': 1.468929409980774} +03/05/2022 11:33:39 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/05/2022 11:33:44 - INFO - codeparrot_training - Step 39216: {'lr': 0.0004259643140566352, 'samples': 20079104, 'steps': 39216, 'loss/train': 1.2566858530044556} +03/05/2022 11:33:47 - INFO - codeparrot_training - Step 39217: {'lr': 0.0004259605444165652, 'samples': 20079616, 'steps': 39217, 'loss/train': 1.6865615844726562} +03/05/2022 11:33:47 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 11:33:52 - INFO - codeparrot_training - Step 39218: {'lr': 0.0004259567746972101, 'samples': 20080128, 'steps': 39218, 'loss/train': 1.6043339967727661} +03/05/2022 11:33:55 - INFO - codeparrot_training - Step 39219: {'lr': 0.00042595300489857164, 'samples': 20080640, 'steps': 39219, 'loss/train': 1.5585455894470215} +03/05/2022 11:33:56 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 11:34:01 - INFO - codeparrot_training - Step 39220: {'lr': 0.0004259492350206514, 'samples': 20081152, 'steps': 39220, 'loss/train': 1.396196722984314} +03/05/2022 11:34:04 - INFO - codeparrot_training - Step 39221: {'lr': 0.00042594546506345124, 'samples': 20081664, 'steps': 39221, 'loss/train': 1.6927073001861572} +03/05/2022 11:34:04 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/05/2022 11:34:09 - INFO - codeparrot_training - Step 39222: {'lr': 0.00042594169502697265, 'samples': 20082176, 'steps': 39222, 'loss/train': 1.2485873699188232} +03/05/2022 11:34:12 - INFO - codeparrot_training - Step 39223: {'lr': 0.00042593792491121753, 'samples': 20082688, 'steps': 39223, 'loss/train': 1.6447014808654785} +03/05/2022 11:34:12 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 11:34:17 - INFO - codeparrot_training - Step 39224: {'lr': 0.00042593415471618744, 'samples': 20083200, 'steps': 39224, 'loss/train': 2.443131923675537} +03/05/2022 11:34:21 - INFO - codeparrot_training - Step 39225: {'lr': 0.0004259303844418841, 'samples': 20083712, 'steps': 39225, 'loss/train': 1.5844464302062988} +03/05/2022 11:34:21 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 11:34:26 - INFO - codeparrot_training - Step 39226: {'lr': 0.00042592661408830937, 'samples': 20084224, 'steps': 39226, 'loss/train': 1.7553945779800415} +03/05/2022 11:34:29 - INFO - codeparrot_training - Step 39227: {'lr': 0.00042592284365546474, 'samples': 20084736, 'steps': 39227, 'loss/train': 1.4026416540145874} +03/05/2022 11:34:29 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 11:34:34 - INFO - codeparrot_training - Step 39228: {'lr': 0.00042591907314335197, 'samples': 20085248, 'steps': 39228, 'loss/train': 1.144972324371338} +03/05/2022 11:34:38 - INFO - codeparrot_training - Step 39229: {'lr': 0.00042591530255197286, 'samples': 20085760, 'steps': 39229, 'loss/train': 2.1506900787353516} +03/05/2022 11:34:38 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 11:34:43 - INFO - codeparrot_training - Step 39230: {'lr': 0.00042591153188132903, 'samples': 20086272, 'steps': 39230, 'loss/train': 1.4605000019073486} +03/05/2022 11:34:46 - INFO - codeparrot_training - Step 39231: {'lr': 0.00042590776113142216, 'samples': 20086784, 'steps': 39231, 'loss/train': 1.7237632274627686} +03/05/2022 11:34:46 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/05/2022 11:34:51 - INFO - codeparrot_training - Step 39232: {'lr': 0.00042590399030225393, 'samples': 20087296, 'steps': 39232, 'loss/train': 1.3452601432800293} +03/05/2022 11:34:55 - INFO - codeparrot_training - Step 39233: {'lr': 0.0004259002193938261, 'samples': 20087808, 'steps': 39233, 'loss/train': 0.7326532602310181} +03/05/2022 11:34:55 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 11:35:00 - INFO - codeparrot_training - Step 39234: {'lr': 0.0004258964484061403, 'samples': 20088320, 'steps': 39234, 'loss/train': 2.176058292388916} +03/05/2022 11:35:04 - INFO - codeparrot_training - Step 39235: {'lr': 0.00042589267733919833, 'samples': 20088832, 'steps': 39235, 'loss/train': 2.362720012664795} +03/05/2022 11:35:06 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/05/2022 11:35:09 - INFO - codeparrot_training - Step 39236: {'lr': 0.0004258889061930018, 'samples': 20089344, 'steps': 39236, 'loss/train': 1.4387102127075195} +03/05/2022 11:35:12 - INFO - codeparrot_training - Step 39237: {'lr': 0.0004258851349675524, 'samples': 20089856, 'steps': 39237, 'loss/train': 1.5271624326705933} +03/05/2022 11:35:15 - INFO - codeparrot_training - Step 39238: {'lr': 0.00042588136366285197, 'samples': 20090368, 'steps': 39238, 'loss/train': 2.0519814491271973} +03/05/2022 11:35:15 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 11:35:20 - INFO - codeparrot_training - Step 39239: {'lr': 0.0004258775922789021, 'samples': 20090880, 'steps': 39239, 'loss/train': 1.9903273582458496} +03/05/2022 11:35:24 - INFO - codeparrot_training - Step 39240: {'lr': 0.0004258738208157045, 'samples': 20091392, 'steps': 39240, 'loss/train': 1.733928918838501} +03/05/2022 11:35:24 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/05/2022 11:35:29 - INFO - codeparrot_training - Step 39241: {'lr': 0.0004258700492732608, 'samples': 20091904, 'steps': 39241, 'loss/train': 1.7905579805374146} +03/05/2022 11:35:32 - INFO - codeparrot_training - Step 39242: {'lr': 0.0004258662776515728, 'samples': 20092416, 'steps': 39242, 'loss/train': 1.8215301036834717} +03/05/2022 11:35:32 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/05/2022 11:35:37 - INFO - codeparrot_training - Step 39243: {'lr': 0.00042586250595064216, 'samples': 20092928, 'steps': 39243, 'loss/train': 2.164107084274292} +03/05/2022 11:35:40 - INFO - codeparrot_training - Step 39244: {'lr': 0.0004258587341704706, 'samples': 20093440, 'steps': 39244, 'loss/train': 1.5649827718734741} +03/05/2022 11:35:41 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 11:35:46 - INFO - codeparrot_training - Step 39245: {'lr': 0.00042585496231105986, 'samples': 20093952, 'steps': 39245, 'loss/train': 1.9379334449768066} +03/05/2022 11:35:49 - INFO - codeparrot_training - Step 39246: {'lr': 0.00042585119037241156, 'samples': 20094464, 'steps': 39246, 'loss/train': 1.90833580493927} +03/05/2022 11:35:49 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 11:35:54 - INFO - codeparrot_training - Step 39247: {'lr': 0.00042584741835452743, 'samples': 20094976, 'steps': 39247, 'loss/train': 1.798171877861023} +03/05/2022 11:35:57 - INFO - codeparrot_training - Step 39248: {'lr': 0.0004258436462574091, 'samples': 20095488, 'steps': 39248, 'loss/train': 2.4061174392700195} +03/05/2022 11:35:58 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 11:36:03 - INFO - codeparrot_training - Step 39249: {'lr': 0.0004258398740810584, 'samples': 20096000, 'steps': 39249, 'loss/train': 1.8006937503814697} +03/05/2022 11:36:06 - INFO - codeparrot_training - Step 39250: {'lr': 0.00042583610182547694, 'samples': 20096512, 'steps': 39250, 'loss/train': 1.4761786460876465} +03/05/2022 11:36:07 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 11:36:11 - INFO - codeparrot_training - Step 39251: {'lr': 0.0004258323294906665, 'samples': 20097024, 'steps': 39251, 'loss/train': 1.3168997764587402} +03/05/2022 11:36:14 - INFO - codeparrot_training - Step 39252: {'lr': 0.00042582855707662864, 'samples': 20097536, 'steps': 39252, 'loss/train': 2.300650119781494} +03/05/2022 11:36:15 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 11:36:19 - INFO - codeparrot_training - Step 39253: {'lr': 0.00042582478458336523, 'samples': 20098048, 'steps': 39253, 'loss/train': 1.4646955728530884} +03/05/2022 11:36:23 - INFO - codeparrot_training - Step 39254: {'lr': 0.00042582101201087786, 'samples': 20098560, 'steps': 39254, 'loss/train': 1.3625543117523193} +03/05/2022 11:36:23 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 11:36:28 - INFO - codeparrot_training - Step 39255: {'lr': 0.00042581723935916817, 'samples': 20099072, 'steps': 39255, 'loss/train': 0.42943254113197327} +03/05/2022 11:36:31 - INFO - codeparrot_training - Step 39256: {'lr': 0.00042581346662823804, 'samples': 20099584, 'steps': 39256, 'loss/train': 1.3863340616226196} +03/05/2022 11:36:32 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 11:36:36 - INFO - codeparrot_training - Step 39257: {'lr': 0.00042580969381808906, 'samples': 20100096, 'steps': 39257, 'loss/train': 1.898044228553772} +03/05/2022 11:36:39 - INFO - codeparrot_training - Step 39258: {'lr': 0.00042580592092872295, 'samples': 20100608, 'steps': 39258, 'loss/train': 2.029705047607422} +03/05/2022 11:36:40 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 11:36:45 - INFO - codeparrot_training - Step 39259: {'lr': 0.0004258021479601414, 'samples': 20101120, 'steps': 39259, 'loss/train': 0.9702457785606384} +03/05/2022 11:36:48 - INFO - codeparrot_training - Step 39260: {'lr': 0.0004257983749123461, 'samples': 20101632, 'steps': 39260, 'loss/train': 1.1560468673706055} +03/05/2022 11:36:48 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/05/2022 11:36:53 - INFO - codeparrot_training - Step 39261: {'lr': 0.00042579460178533875, 'samples': 20102144, 'steps': 39261, 'loss/train': 1.7540640830993652} +03/05/2022 11:36:56 - INFO - codeparrot_training - Step 39262: {'lr': 0.0004257908285791211, 'samples': 20102656, 'steps': 39262, 'loss/train': 0.9023854732513428} +03/05/2022 11:36:56 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 11:37:01 - INFO - codeparrot_training - Step 39263: {'lr': 0.00042578705529369476, 'samples': 20103168, 'steps': 39263, 'loss/train': 1.1134076118469238} +03/05/2022 11:37:04 - INFO - codeparrot_training - Step 39264: {'lr': 0.00042578328192906153, 'samples': 20103680, 'steps': 39264, 'loss/train': 1.8323874473571777} +03/05/2022 11:37:04 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 11:37:10 - INFO - codeparrot_training - Step 39265: {'lr': 0.00042577950848522305, 'samples': 20104192, 'steps': 39265, 'loss/train': 1.6415808200836182} +03/05/2022 11:37:13 - INFO - codeparrot_training - Step 39266: {'lr': 0.0004257757349621811, 'samples': 20104704, 'steps': 39266, 'loss/train': 1.2471224069595337} +03/05/2022 11:37:13 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/05/2022 11:37:18 - INFO - codeparrot_training - Step 39267: {'lr': 0.0004257719613599372, 'samples': 20105216, 'steps': 39267, 'loss/train': 1.4800223112106323} +03/05/2022 11:37:21 - INFO - codeparrot_training - Step 39268: {'lr': 0.0004257681876784932, 'samples': 20105728, 'steps': 39268, 'loss/train': 1.3436870574951172} +03/05/2022 11:37:21 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 11:37:27 - INFO - codeparrot_training - Step 39269: {'lr': 0.0004257644139178508, 'samples': 20106240, 'steps': 39269, 'loss/train': 1.1068660020828247} +03/05/2022 11:37:30 - INFO - codeparrot_training - Step 39270: {'lr': 0.0004257606400780117, 'samples': 20106752, 'steps': 39270, 'loss/train': 1.8305110931396484} +03/05/2022 11:37:31 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 11:37:36 - INFO - codeparrot_training - Step 39271: {'lr': 0.0004257568661589775, 'samples': 20107264, 'steps': 39271, 'loss/train': 2.0048723220825195} +03/05/2022 11:37:39 - INFO - codeparrot_training - Step 39272: {'lr': 0.00042575309216074997, 'samples': 20107776, 'steps': 39272, 'loss/train': 1.9744149446487427} +03/05/2022 11:37:40 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 11:37:44 - INFO - codeparrot_training - Step 39273: {'lr': 0.00042574931808333095, 'samples': 20108288, 'steps': 39273, 'loss/train': 1.496464729309082} +03/05/2022 11:37:47 - INFO - codeparrot_training - Step 39274: {'lr': 0.0004257455439267218, 'samples': 20108800, 'steps': 39274, 'loss/train': 1.4168360233306885} +03/05/2022 11:37:48 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 11:37:52 - INFO - codeparrot_training - Step 39275: {'lr': 0.00042574176969092454, 'samples': 20109312, 'steps': 39275, 'loss/train': 0.6922250986099243} +03/05/2022 11:37:56 - INFO - codeparrot_training - Step 39276: {'lr': 0.0004257379953759407, 'samples': 20109824, 'steps': 39276, 'loss/train': 1.5159292221069336} +03/05/2022 11:37:57 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/05/2022 11:38:01 - INFO - codeparrot_training - Step 39277: {'lr': 0.00042573422098177204, 'samples': 20110336, 'steps': 39277, 'loss/train': 2.0649304389953613} +03/05/2022 11:38:04 - INFO - codeparrot_training - Step 39278: {'lr': 0.0004257304465084203, 'samples': 20110848, 'steps': 39278, 'loss/train': 1.8487310409545898} +03/05/2022 11:38:05 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 11:38:09 - INFO - codeparrot_training - Step 39279: {'lr': 0.0004257266719558871, 'samples': 20111360, 'steps': 39279, 'loss/train': 1.920003056526184} +03/05/2022 11:38:12 - INFO - codeparrot_training - Step 39280: {'lr': 0.0004257228973241741, 'samples': 20111872, 'steps': 39280, 'loss/train': 1.4649440050125122} +03/05/2022 11:38:13 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 11:38:18 - INFO - codeparrot_training - Step 39281: {'lr': 0.00042571912261328315, 'samples': 20112384, 'steps': 39281, 'loss/train': 2.0862770080566406} +03/05/2022 11:38:21 - INFO - codeparrot_training - Step 39282: {'lr': 0.00042571534782321593, 'samples': 20112896, 'steps': 39282, 'loss/train': 1.8228590488433838} +03/05/2022 11:38:22 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/05/2022 11:38:27 - INFO - codeparrot_training - Step 39283: {'lr': 0.000425711572953974, 'samples': 20113408, 'steps': 39283, 'loss/train': 1.0335910320281982} +03/05/2022 11:38:30 - INFO - codeparrot_training - Step 39284: {'lr': 0.00042570779800555914, 'samples': 20113920, 'steps': 39284, 'loss/train': 0.5983594655990601} +03/05/2022 11:38:32 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 11:38:35 - INFO - codeparrot_training - Step 39285: {'lr': 0.00042570402297797304, 'samples': 20114432, 'steps': 39285, 'loss/train': 2.1930768489837646} +03/05/2022 11:38:38 - INFO - codeparrot_training - Step 39286: {'lr': 0.0004257002478712175, 'samples': 20114944, 'steps': 39286, 'loss/train': 2.088867664337158} +03/05/2022 11:38:41 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 11:38:44 - INFO - codeparrot_training - Step 39287: {'lr': 0.0004256964726852941, 'samples': 20115456, 'steps': 39287, 'loss/train': 1.509057641029358} +03/05/2022 11:38:47 - INFO - codeparrot_training - Step 39288: {'lr': 0.0004256926974202046, 'samples': 20115968, 'steps': 39288, 'loss/train': 1.154793620109558} +03/05/2022 11:38:49 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 11:38:52 - INFO - codeparrot_training - Step 39289: {'lr': 0.00042568892207595066, 'samples': 20116480, 'steps': 39289, 'loss/train': 1.3559041023254395} +03/05/2022 11:38:55 - INFO - codeparrot_training - Step 39290: {'lr': 0.000425685146652534, 'samples': 20116992, 'steps': 39290, 'loss/train': 0.9894947409629822} +03/05/2022 11:38:57 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 11:39:00 - INFO - codeparrot_training - Step 39291: {'lr': 0.00042568137114995633, 'samples': 20117504, 'steps': 39291, 'loss/train': 1.4956916570663452} +03/05/2022 11:39:04 - INFO - codeparrot_training - Step 39292: {'lr': 0.00042567759556821937, 'samples': 20118016, 'steps': 39292, 'loss/train': 2.0803685188293457} +03/05/2022 11:39:06 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 11:39:09 - INFO - codeparrot_training - Step 39293: {'lr': 0.00042567381990732476, 'samples': 20118528, 'steps': 39293, 'loss/train': 0.6321213841438293} +03/05/2022 11:39:12 - INFO - codeparrot_training - Step 39294: {'lr': 0.0004256700441672743, 'samples': 20119040, 'steps': 39294, 'loss/train': 1.3099424839019775} +03/05/2022 11:39:16 - INFO - codeparrot_training - Step 39295: {'lr': 0.0004256662683480695, 'samples': 20119552, 'steps': 39295, 'loss/train': 2.103403091430664} +03/05/2022 11:39:16 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/05/2022 11:39:21 - INFO - codeparrot_training - Step 39296: {'lr': 0.00042566249244971235, 'samples': 20120064, 'steps': 39296, 'loss/train': 1.986287236213684} +03/05/2022 11:39:24 - INFO - codeparrot_training - Step 39297: {'lr': 0.0004256587164722043, 'samples': 20120576, 'steps': 39297, 'loss/train': 1.5257185697555542} +03/05/2022 11:39:25 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 11:39:29 - INFO - codeparrot_training - Step 39298: {'lr': 0.0004256549404155471, 'samples': 20121088, 'steps': 39298, 'loss/train': 0.9372047781944275} +03/05/2022 11:39:32 - INFO - codeparrot_training - Step 39299: {'lr': 0.0004256511642797426, 'samples': 20121600, 'steps': 39299, 'loss/train': 1.356652855873108} +03/05/2022 11:39:33 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 11:39:38 - INFO - codeparrot_training - Step 39300: {'lr': 0.0004256473880647923, 'samples': 20122112, 'steps': 39300, 'loss/train': 1.2373672723770142} +03/05/2022 11:39:41 - INFO - codeparrot_training - Step 39301: {'lr': 0.0004256436117706981, 'samples': 20122624, 'steps': 39301, 'loss/train': 2.090449571609497} +03/05/2022 11:39:42 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 11:39:46 - INFO - codeparrot_training - Step 39302: {'lr': 0.0004256398353974615, 'samples': 20123136, 'steps': 39302, 'loss/train': 2.259685516357422} +03/05/2022 11:39:49 - INFO - codeparrot_training - Step 39303: {'lr': 0.00042563605894508434, 'samples': 20123648, 'steps': 39303, 'loss/train': 1.4122346639633179} +03/05/2022 11:39:50 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/05/2022 11:39:55 - INFO - codeparrot_training - Step 39304: {'lr': 0.00042563228241356834, 'samples': 20124160, 'steps': 39304, 'loss/train': 1.8796900510787964} +03/05/2022 11:39:58 - INFO - codeparrot_training - Step 39305: {'lr': 0.000425628505802915, 'samples': 20124672, 'steps': 39305, 'loss/train': 1.5217339992523193} +03/05/2022 11:39:59 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/05/2022 11:40:03 - INFO - codeparrot_training - Step 39306: {'lr': 0.0004256247291131263, 'samples': 20125184, 'steps': 39306, 'loss/train': 1.08473539352417} +03/05/2022 11:40:06 - INFO - codeparrot_training - Step 39307: {'lr': 0.00042562095234420375, 'samples': 20125696, 'steps': 39307, 'loss/train': 0.43585866689682007} +03/05/2022 11:40:07 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/05/2022 11:40:11 - INFO - codeparrot_training - Step 39308: {'lr': 0.00042561717549614907, 'samples': 20126208, 'steps': 39308, 'loss/train': 1.2756831645965576} +03/05/2022 11:40:15 - INFO - codeparrot_training - Step 39309: {'lr': 0.0004256133985689641, 'samples': 20126720, 'steps': 39309, 'loss/train': 1.2425265312194824} +03/05/2022 11:40:15 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/05/2022 11:40:20 - INFO - codeparrot_training - Step 39310: {'lr': 0.0004256096215626504, 'samples': 20127232, 'steps': 39310, 'loss/train': 2.195155382156372} +03/05/2022 11:40:23 - INFO - codeparrot_training - Step 39311: {'lr': 0.0004256058444772097, 'samples': 20127744, 'steps': 39311, 'loss/train': 1.2797623872756958} +03/05/2022 11:40:24 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/05/2022 11:40:28 - INFO - codeparrot_training - Step 39312: {'lr': 0.0004256020673126437, 'samples': 20128256, 'steps': 39312, 'loss/train': 1.314859390258789} +03/05/2022 11:40:32 - INFO - codeparrot_training - Step 39313: {'lr': 0.0004255982900689541, 'samples': 20128768, 'steps': 39313, 'loss/train': 2.400352716445923} +03/05/2022 11:40:32 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 11:40:37 - INFO - codeparrot_training - Step 39314: {'lr': 0.0004255945127461427, 'samples': 20129280, 'steps': 39314, 'loss/train': 1.6714900732040405} +03/05/2022 11:40:40 - INFO - codeparrot_training - Step 39315: {'lr': 0.00042559073534421114, 'samples': 20129792, 'steps': 39315, 'loss/train': 1.3838931322097778} +03/05/2022 11:40:41 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 11:40:45 - INFO - codeparrot_training - Step 39316: {'lr': 0.00042558695786316106, 'samples': 20130304, 'steps': 39316, 'loss/train': 1.8757010698318481} +03/05/2022 11:40:48 - INFO - codeparrot_training - Step 39317: {'lr': 0.00042558318030299415, 'samples': 20130816, 'steps': 39317, 'loss/train': 2.2323172092437744} +03/05/2022 11:40:49 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 11:40:54 - INFO - codeparrot_training - Step 39318: {'lr': 0.0004255794026637122, 'samples': 20131328, 'steps': 39318, 'loss/train': 1.6905714273452759} +03/05/2022 11:40:57 - INFO - codeparrot_training - Step 39319: {'lr': 0.0004255756249453169, 'samples': 20131840, 'steps': 39319, 'loss/train': 2.1668126583099365} +03/05/2022 11:40:58 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/05/2022 11:41:03 - INFO - codeparrot_training - Step 39320: {'lr': 0.00042557184714780993, 'samples': 20132352, 'steps': 39320, 'loss/train': 1.5185813903808594} +03/05/2022 11:41:06 - INFO - codeparrot_training - Step 39321: {'lr': 0.000425568069271193, 'samples': 20132864, 'steps': 39321, 'loss/train': 2.3960683345794678} +03/05/2022 11:41:07 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 11:41:11 - INFO - codeparrot_training - Step 39322: {'lr': 0.00042556429131546775, 'samples': 20133376, 'steps': 39322, 'loss/train': 2.1479358673095703} +03/05/2022 11:41:14 - INFO - codeparrot_training - Step 39323: {'lr': 0.000425560513280636, 'samples': 20133888, 'steps': 39323, 'loss/train': 1.4379756450653076} +03/05/2022 11:41:15 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 11:41:19 - INFO - codeparrot_training - Step 39324: {'lr': 0.00042555673516669933, 'samples': 20134400, 'steps': 39324, 'loss/train': 1.374849796295166} +03/05/2022 11:41:23 - INFO - codeparrot_training - Step 39325: {'lr': 0.0004255529569736596, 'samples': 20134912, 'steps': 39325, 'loss/train': 1.0687437057495117} +03/05/2022 11:41:24 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 11:41:28 - INFO - codeparrot_training - Step 39326: {'lr': 0.0004255491787015183, 'samples': 20135424, 'steps': 39326, 'loss/train': 1.6843093633651733} +03/05/2022 11:41:31 - INFO - codeparrot_training - Step 39327: {'lr': 0.0004255454003502774, 'samples': 20135936, 'steps': 39327, 'loss/train': 6.155123233795166} +03/05/2022 11:41:32 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/05/2022 11:41:36 - INFO - codeparrot_training - Step 39328: {'lr': 0.0004255416219199384, 'samples': 20136448, 'steps': 39328, 'loss/train': 1.7958861589431763} +03/05/2022 11:41:40 - INFO - codeparrot_training - Step 39329: {'lr': 0.0004255378434105029, 'samples': 20136960, 'steps': 39329, 'loss/train': 1.5802282094955444} +03/05/2022 11:41:41 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/05/2022 11:41:45 - INFO - codeparrot_training - Step 39330: {'lr': 0.00042553406482197297, 'samples': 20137472, 'steps': 39330, 'loss/train': 1.2636702060699463} +03/05/2022 11:41:48 - INFO - codeparrot_training - Step 39331: {'lr': 0.00042553028615434997, 'samples': 20137984, 'steps': 39331, 'loss/train': 1.2248021364212036} +03/05/2022 11:41:49 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 11:41:53 - INFO - codeparrot_training - Step 39332: {'lr': 0.0004255265074076358, 'samples': 20138496, 'steps': 39332, 'loss/train': 2.1438958644866943} +03/05/2022 11:41:56 - INFO - codeparrot_training - Step 39333: {'lr': 0.00042552272858183203, 'samples': 20139008, 'steps': 39333, 'loss/train': 1.1343510150909424} +03/05/2022 11:41:58 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 11:42:02 - INFO - codeparrot_training - Step 39334: {'lr': 0.0004255189496769405, 'samples': 20139520, 'steps': 39334, 'loss/train': 1.6371914148330688} +03/05/2022 11:42:05 - INFO - codeparrot_training - Step 39335: {'lr': 0.00042551517069296276, 'samples': 20140032, 'steps': 39335, 'loss/train': 1.1965466737747192} +03/05/2022 11:42:06 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 11:42:10 - INFO - codeparrot_training - Step 39336: {'lr': 0.00042551139162990065, 'samples': 20140544, 'steps': 39336, 'loss/train': 1.686252474784851} +03/05/2022 11:42:13 - INFO - codeparrot_training - Step 39337: {'lr': 0.0004255076124877558, 'samples': 20141056, 'steps': 39337, 'loss/train': 1.7307718992233276} +03/05/2022 11:42:15 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 11:42:19 - INFO - codeparrot_training - Step 39338: {'lr': 0.0004255038332665299, 'samples': 20141568, 'steps': 39338, 'loss/train': 1.2093846797943115} +03/05/2022 11:42:22 - INFO - codeparrot_training - Step 39339: {'lr': 0.0004255000539662247, 'samples': 20142080, 'steps': 39339, 'loss/train': 2.201260805130005} +03/05/2022 11:42:23 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 11:42:27 - INFO - codeparrot_training - Step 39340: {'lr': 0.0004254962745868419, 'samples': 20142592, 'steps': 39340, 'loss/train': 1.7000435590744019} +03/05/2022 11:42:30 - INFO - codeparrot_training - Step 39341: {'lr': 0.00042549249512838325, 'samples': 20143104, 'steps': 39341, 'loss/train': 1.7388160228729248} +03/05/2022 11:42:31 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 11:42:36 - INFO - codeparrot_training - Step 39342: {'lr': 0.00042548871559085026, 'samples': 20143616, 'steps': 39342, 'loss/train': 1.0788383483886719} +03/05/2022 11:42:39 - INFO - codeparrot_training - Step 39343: {'lr': 0.0004254849359742449, 'samples': 20144128, 'steps': 39343, 'loss/train': 1.23121976852417} +03/05/2022 11:42:41 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/05/2022 11:42:44 - INFO - codeparrot_training - Step 39344: {'lr': 0.0004254811562785686, 'samples': 20144640, 'steps': 39344, 'loss/train': 1.9464435577392578} +03/05/2022 11:42:47 - INFO - codeparrot_training - Step 39345: {'lr': 0.00042547737650382324, 'samples': 20145152, 'steps': 39345, 'loss/train': 1.6882463693618774} +03/05/2022 11:42:49 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/05/2022 11:42:53 - INFO - codeparrot_training - Step 39346: {'lr': 0.0004254735966500105, 'samples': 20145664, 'steps': 39346, 'loss/train': 1.6875114440917969} +03/05/2022 11:42:56 - INFO - codeparrot_training - Step 39347: {'lr': 0.00042546981671713206, 'samples': 20146176, 'steps': 39347, 'loss/train': 1.6980832815170288} +03/05/2022 11:42:58 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/05/2022 11:43:01 - INFO - codeparrot_training - Step 39348: {'lr': 0.0004254660367051896, 'samples': 20146688, 'steps': 39348, 'loss/train': 2.1431195735931396} +03/05/2022 11:43:04 - INFO - codeparrot_training - Step 39349: {'lr': 0.0004254622566141849, 'samples': 20147200, 'steps': 39349, 'loss/train': 1.6874592304229736} +03/05/2022 11:43:06 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 11:43:09 - INFO - codeparrot_training - Step 39350: {'lr': 0.0004254584764441196, 'samples': 20147712, 'steps': 39350, 'loss/train': 1.8927046060562134} +03/05/2022 11:43:13 - INFO - codeparrot_training - Step 39351: {'lr': 0.00042545469619499545, 'samples': 20148224, 'steps': 39351, 'loss/train': 2.6127960681915283} +03/05/2022 11:43:14 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/05/2022 11:43:18 - INFO - codeparrot_training - Step 39352: {'lr': 0.00042545091586681404, 'samples': 20148736, 'steps': 39352, 'loss/train': 1.507682204246521} +03/05/2022 11:43:21 - INFO - codeparrot_training - Step 39353: {'lr': 0.0004254471354595772, 'samples': 20149248, 'steps': 39353, 'loss/train': 2.062753915786743} +03/05/2022 11:43:23 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/05/2022 11:43:26 - INFO - codeparrot_training - Step 39354: {'lr': 0.0004254433549732866, 'samples': 20149760, 'steps': 39354, 'loss/train': 2.1917827129364014} +03/05/2022 11:43:30 - INFO - codeparrot_training - Step 39355: {'lr': 0.0004254395744079439, 'samples': 20150272, 'steps': 39355, 'loss/train': 0.8095563650131226} +03/05/2022 11:43:31 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/05/2022 11:43:35 - INFO - codeparrot_training - Step 39356: {'lr': 0.0004254357937635509, 'samples': 20150784, 'steps': 39356, 'loss/train': 1.4243088960647583} +03/05/2022 11:43:38 - INFO - codeparrot_training - Step 39357: {'lr': 0.00042543201304010914, 'samples': 20151296, 'steps': 39357, 'loss/train': 1.434818983078003} +03/05/2022 11:43:40 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 11:43:43 - INFO - codeparrot_training - Step 39358: {'lr': 0.0004254282322376205, 'samples': 20151808, 'steps': 39358, 'loss/train': 2.1282331943511963} +03/05/2022 11:43:46 - INFO - codeparrot_training - Step 39359: {'lr': 0.0004254244513560866, 'samples': 20152320, 'steps': 39359, 'loss/train': 2.13814377784729} +03/05/2022 11:43:48 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 11:43:52 - INFO - codeparrot_training - Step 39360: {'lr': 0.00042542067039550916, 'samples': 20152832, 'steps': 39360, 'loss/train': 2.2471859455108643} +03/05/2022 11:43:55 - INFO - codeparrot_training - Step 39361: {'lr': 0.00042541688935588984, 'samples': 20153344, 'steps': 39361, 'loss/train': 2.309373617172241} +03/05/2022 11:43:56 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/05/2022 11:44:00 - INFO - codeparrot_training - Step 39362: {'lr': 0.00042541310823723035, 'samples': 20153856, 'steps': 39362, 'loss/train': 0.11443085968494415} +03/05/2022 11:44:03 - INFO - codeparrot_training - Step 39363: {'lr': 0.00042540932703953246, 'samples': 20154368, 'steps': 39363, 'loss/train': 1.9737426042556763} +03/05/2022 11:44:04 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 11:44:08 - INFO - codeparrot_training - Step 39364: {'lr': 0.00042540554576279776, 'samples': 20154880, 'steps': 39364, 'loss/train': 2.025761842727661} +03/05/2022 11:44:12 - INFO - codeparrot_training - Step 39365: {'lr': 0.0004254017644070282, 'samples': 20155392, 'steps': 39365, 'loss/train': 1.2585152387619019} +03/05/2022 11:44:13 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 11:44:17 - INFO - codeparrot_training - Step 39366: {'lr': 0.0004253979829722251, 'samples': 20155904, 'steps': 39366, 'loss/train': 2.368009090423584} +03/05/2022 11:44:20 - INFO - codeparrot_training - Step 39367: {'lr': 0.00042539420145839055, 'samples': 20156416, 'steps': 39367, 'loss/train': 1.731242060661316} +03/05/2022 11:44:21 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 11:44:25 - INFO - codeparrot_training - Step 39368: {'lr': 0.00042539041986552596, 'samples': 20156928, 'steps': 39368, 'loss/train': 0.7698107957839966} +03/05/2022 11:44:28 - INFO - codeparrot_training - Step 39369: {'lr': 0.00042538663819363323, 'samples': 20157440, 'steps': 39369, 'loss/train': 1.2990303039550781} +03/05/2022 11:44:29 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 11:44:34 - INFO - codeparrot_training - Step 39370: {'lr': 0.000425382856442714, 'samples': 20157952, 'steps': 39370, 'loss/train': 1.7373076677322388} +03/05/2022 11:44:37 - INFO - codeparrot_training - Step 39371: {'lr': 0.0004253790746127699, 'samples': 20158464, 'steps': 39371, 'loss/train': 1.1799730062484741} +03/05/2022 11:44:37 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 11:44:42 - INFO - codeparrot_training - Step 39372: {'lr': 0.0004253752927038027, 'samples': 20158976, 'steps': 39372, 'loss/train': 1.6194026470184326} +03/05/2022 11:44:45 - INFO - codeparrot_training - Step 39373: {'lr': 0.0004253715107158141, 'samples': 20159488, 'steps': 39373, 'loss/train': 1.5046091079711914} +03/05/2022 11:44:46 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 11:44:51 - INFO - codeparrot_training - Step 39374: {'lr': 0.0004253677286488058, 'samples': 20160000, 'steps': 39374, 'loss/train': 2.1779208183288574} +03/05/2022 11:44:54 - INFO - codeparrot_training - Step 39375: {'lr': 0.00042536394650277953, 'samples': 20160512, 'steps': 39375, 'loss/train': 1.377008080482483} +03/05/2022 11:44:54 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/05/2022 11:44:59 - INFO - codeparrot_training - Step 39376: {'lr': 0.000425360164277737, 'samples': 20161024, 'steps': 39376, 'loss/train': 1.7654107809066772} +03/05/2022 11:45:02 - INFO - codeparrot_training - Step 39377: {'lr': 0.00042535638197367984, 'samples': 20161536, 'steps': 39377, 'loss/train': 1.1904605627059937} +03/05/2022 11:45:02 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 11:45:07 - INFO - codeparrot_training - Step 39378: {'lr': 0.0004253525995906098, 'samples': 20162048, 'steps': 39378, 'loss/train': 1.7329192161560059} +03/05/2022 11:45:10 - INFO - codeparrot_training - Step 39379: {'lr': 0.00042534881712852856, 'samples': 20162560, 'steps': 39379, 'loss/train': 2.0125482082366943} +03/05/2022 11:45:11 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 11:45:16 - INFO - codeparrot_training - Step 39380: {'lr': 0.0004253450345874379, 'samples': 20163072, 'steps': 39380, 'loss/train': 0.6299535632133484} +03/05/2022 11:45:19 - INFO - codeparrot_training - Step 39381: {'lr': 0.00042534125196733955, 'samples': 20163584, 'steps': 39381, 'loss/train': 1.694901466369629} +03/05/2022 11:45:19 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/05/2022 11:45:24 - INFO - codeparrot_training - Step 39382: {'lr': 0.000425337469268235, 'samples': 20164096, 'steps': 39382, 'loss/train': 1.9881478548049927} +03/05/2022 11:45:27 - INFO - codeparrot_training - Step 39383: {'lr': 0.00042533368649012615, 'samples': 20164608, 'steps': 39383, 'loss/train': 1.6406105756759644} +03/05/2022 11:45:27 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 11:45:33 - INFO - codeparrot_training - Step 39384: {'lr': 0.0004253299036330146, 'samples': 20165120, 'steps': 39384, 'loss/train': 1.6849263906478882} +03/05/2022 11:45:36 - INFO - codeparrot_training - Step 39385: {'lr': 0.00042532612069690214, 'samples': 20165632, 'steps': 39385, 'loss/train': 1.8215641975402832} +03/05/2022 11:45:36 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/05/2022 11:45:41 - INFO - codeparrot_training - Step 39386: {'lr': 0.0004253223376817904, 'samples': 20166144, 'steps': 39386, 'loss/train': 1.7656104564666748} +03/05/2022 11:45:44 - INFO - codeparrot_training - Step 39387: {'lr': 0.0004253185545876812, 'samples': 20166656, 'steps': 39387, 'loss/train': 2.0673701763153076} +03/05/2022 11:45:44 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/05/2022 11:45:49 - INFO - codeparrot_training - Step 39388: {'lr': 0.0004253147714145761, 'samples': 20167168, 'steps': 39388, 'loss/train': 1.419786810874939} +03/05/2022 11:45:53 - INFO - codeparrot_training - Step 39389: {'lr': 0.00042531098816247695, 'samples': 20167680, 'steps': 39389, 'loss/train': 1.9667061567306519} +03/05/2022 11:45:53 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 11:45:58 - INFO - codeparrot_training - Step 39390: {'lr': 0.00042530720483138524, 'samples': 20168192, 'steps': 39390, 'loss/train': 1.2702560424804688} +03/05/2022 11:46:01 - INFO - codeparrot_training - Step 39391: {'lr': 0.00042530342142130283, 'samples': 20168704, 'steps': 39391, 'loss/train': 1.3592029809951782} +03/05/2022 11:46:01 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 11:46:06 - INFO - codeparrot_training - Step 39392: {'lr': 0.0004252996379322315, 'samples': 20169216, 'steps': 39392, 'loss/train': 2.2078804969787598} +03/05/2022 11:46:09 - INFO - codeparrot_training - Step 39393: {'lr': 0.0004252958543641728, 'samples': 20169728, 'steps': 39393, 'loss/train': 1.2153912782669067} +03/05/2022 11:46:10 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 11:46:15 - INFO - codeparrot_training - Step 39394: {'lr': 0.0004252920707171285, 'samples': 20170240, 'steps': 39394, 'loss/train': 1.4317548274993896} +03/05/2022 11:46:18 - INFO - codeparrot_training - Step 39395: {'lr': 0.00042528828699110033, 'samples': 20170752, 'steps': 39395, 'loss/train': 1.7917567491531372} +03/05/2022 11:46:18 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 11:46:23 - INFO - codeparrot_training - Step 39396: {'lr': 0.0004252845031860899, 'samples': 20171264, 'steps': 39396, 'loss/train': 1.4303711652755737} +03/05/2022 11:46:26 - INFO - codeparrot_training - Step 39397: {'lr': 0.000425280719302099, 'samples': 20171776, 'steps': 39397, 'loss/train': 1.2609963417053223} +03/05/2022 11:46:26 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/05/2022 11:46:32 - INFO - codeparrot_training - Step 39398: {'lr': 0.0004252769353391294, 'samples': 20172288, 'steps': 39398, 'loss/train': 0.9504624605178833} +03/05/2022 11:46:35 - INFO - codeparrot_training - Step 39399: {'lr': 0.00042527315129718257, 'samples': 20172800, 'steps': 39399, 'loss/train': 2.1288347244262695} +03/05/2022 11:46:35 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 11:46:40 - INFO - codeparrot_training - Step 39400: {'lr': 0.00042526936717626046, 'samples': 20173312, 'steps': 39400, 'loss/train': 1.4981889724731445} +03/05/2022 11:46:43 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 11:46:45 - INFO - codeparrot_training - Step 39401: {'lr': 0.00042526558297636464, 'samples': 20173824, 'steps': 39401, 'loss/train': 1.59617280960083} +03/05/2022 11:46:49 - INFO - codeparrot_training - Step 39402: {'lr': 0.0004252617986974969, 'samples': 20174336, 'steps': 39402, 'loss/train': 1.5019707679748535} +03/05/2022 11:46:51 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/05/2022 11:46:54 - INFO - codeparrot_training - Step 39403: {'lr': 0.00042525801433965883, 'samples': 20174848, 'steps': 39403, 'loss/train': 2.0933539867401123} +03/05/2022 11:46:57 - INFO - codeparrot_training - Step 39404: {'lr': 0.00042525422990285225, 'samples': 20175360, 'steps': 39404, 'loss/train': 1.7473613023757935} +03/05/2022 11:47:00 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 11:47:02 - INFO - codeparrot_training - Step 39405: {'lr': 0.0004252504453870788, 'samples': 20175872, 'steps': 39405, 'loss/train': 1.5347460508346558} +03/05/2022 11:47:05 - INFO - codeparrot_training - Step 39406: {'lr': 0.0004252466607923402, 'samples': 20176384, 'steps': 39406, 'loss/train': 1.3275331258773804} +03/05/2022 11:47:08 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 11:47:11 - INFO - codeparrot_training - Step 39407: {'lr': 0.0004252428761186382, 'samples': 20176896, 'steps': 39407, 'loss/train': 1.8536207675933838} +03/05/2022 11:47:14 - INFO - codeparrot_training - Step 39408: {'lr': 0.0004252390913659744, 'samples': 20177408, 'steps': 39408, 'loss/train': 0.9683761596679688} +03/05/2022 11:47:16 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/05/2022 11:47:19 - INFO - codeparrot_training - Step 39409: {'lr': 0.0004252353065343506, 'samples': 20177920, 'steps': 39409, 'loss/train': 1.8689212799072266} +03/05/2022 11:47:22 - INFO - codeparrot_training - Step 39410: {'lr': 0.0004252315216237684, 'samples': 20178432, 'steps': 39410, 'loss/train': 2.4282302856445312} +03/05/2022 11:47:25 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 11:47:27 - INFO - codeparrot_training - Step 39411: {'lr': 0.00042522773663422977, 'samples': 20178944, 'steps': 39411, 'loss/train': 2.317436456680298} +03/05/2022 11:47:31 - INFO - codeparrot_training - Step 39412: {'lr': 0.000425223951565736, 'samples': 20179456, 'steps': 39412, 'loss/train': 1.8731043338775635} +03/05/2022 11:47:33 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 11:47:36 - INFO - codeparrot_training - Step 39413: {'lr': 0.0004252201664182892, 'samples': 20179968, 'steps': 39413, 'loss/train': 2.163546562194824} +03/05/2022 11:47:39 - INFO - codeparrot_training - Step 39414: {'lr': 0.0004252163811918909, 'samples': 20180480, 'steps': 39414, 'loss/train': 2.5090956687927246} +03/05/2022 11:47:42 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 11:47:44 - INFO - codeparrot_training - Step 39415: {'lr': 0.00042521259588654264, 'samples': 20180992, 'steps': 39415, 'loss/train': 1.4590951204299927} +03/05/2022 11:47:48 - INFO - codeparrot_training - Step 39416: {'lr': 0.00042520881050224637, 'samples': 20181504, 'steps': 39416, 'loss/train': 1.3714085817337036} +03/05/2022 11:47:50 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/05/2022 11:47:53 - INFO - codeparrot_training - Step 39417: {'lr': 0.0004252050250390037, 'samples': 20182016, 'steps': 39417, 'loss/train': 1.9139841794967651} +03/05/2022 11:47:56 - INFO - codeparrot_training - Step 39418: {'lr': 0.0004252012394968164, 'samples': 20182528, 'steps': 39418, 'loss/train': 1.552201747894287} +03/05/2022 11:47:59 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 11:48:01 - INFO - codeparrot_training - Step 39419: {'lr': 0.0004251974538756861, 'samples': 20183040, 'steps': 39419, 'loss/train': 2.104349374771118} +03/05/2022 11:48:05 - INFO - codeparrot_training - Step 39420: {'lr': 0.00042519366817561453, 'samples': 20183552, 'steps': 39420, 'loss/train': 0.8928677439689636} +03/05/2022 11:48:07 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/05/2022 11:48:10 - INFO - codeparrot_training - Step 39421: {'lr': 0.0004251898823966034, 'samples': 20184064, 'steps': 39421, 'loss/train': 1.7101434469223022} +03/05/2022 11:48:13 - INFO - codeparrot_training - Step 39422: {'lr': 0.00042518609653865444, 'samples': 20184576, 'steps': 39422, 'loss/train': 1.8460896015167236} +03/05/2022 11:48:15 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 11:48:18 - INFO - codeparrot_training - Step 39423: {'lr': 0.00042518231060176926, 'samples': 20185088, 'steps': 39423, 'loss/train': 2.041424036026001} +03/05/2022 11:48:22 - INFO - codeparrot_training - Step 39424: {'lr': 0.00042517852458594967, 'samples': 20185600, 'steps': 39424, 'loss/train': 1.5133206844329834} +03/05/2022 11:48:25 - INFO - codeparrot_training - Step 39425: {'lr': 0.00042517473849119734, 'samples': 20186112, 'steps': 39425, 'loss/train': 2.581056594848633} +03/05/2022 11:48:25 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/05/2022 11:48:30 - INFO - codeparrot_training - Step 39426: {'lr': 0.000425170952317514, 'samples': 20186624, 'steps': 39426, 'loss/train': 1.7612334489822388} +03/05/2022 11:48:33 - INFO - codeparrot_training - Step 39427: {'lr': 0.0004251671660649013, 'samples': 20187136, 'steps': 39427, 'loss/train': 1.3517612218856812} +03/05/2022 11:48:33 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 11:48:39 - INFO - codeparrot_training - Step 39428: {'lr': 0.000425163379733361, 'samples': 20187648, 'steps': 39428, 'loss/train': 2.1660208702087402} +03/05/2022 11:48:42 - INFO - codeparrot_training - Step 39429: {'lr': 0.00042515959332289476, 'samples': 20188160, 'steps': 39429, 'loss/train': 4.261512279510498} +03/05/2022 11:48:42 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 11:48:47 - INFO - codeparrot_training - Step 39430: {'lr': 0.0004251558068335043, 'samples': 20188672, 'steps': 39430, 'loss/train': 2.3418679237365723} +03/05/2022 11:48:50 - INFO - codeparrot_training - Step 39431: {'lr': 0.00042515202026519136, 'samples': 20189184, 'steps': 39431, 'loss/train': 1.4312385320663452} +03/05/2022 11:48:51 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 11:48:56 - INFO - codeparrot_training - Step 39432: {'lr': 0.00042514823361795764, 'samples': 20189696, 'steps': 39432, 'loss/train': 2.2210516929626465} +03/05/2022 11:48:59 - INFO - codeparrot_training - Step 39433: {'lr': 0.0004251444468918048, 'samples': 20190208, 'steps': 39433, 'loss/train': 2.286592960357666} +03/05/2022 11:48:59 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 11:49:04 - INFO - codeparrot_training - Step 39434: {'lr': 0.0004251406600867346, 'samples': 20190720, 'steps': 39434, 'loss/train': 0.8650291562080383} +03/05/2022 11:49:07 - INFO - codeparrot_training - Step 39435: {'lr': 0.00042513687320274866, 'samples': 20191232, 'steps': 39435, 'loss/train': 2.496098279953003} +03/05/2022 11:49:07 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 11:49:12 - INFO - codeparrot_training - Step 39436: {'lr': 0.0004251330862398488, 'samples': 20191744, 'steps': 39436, 'loss/train': 2.2240986824035645} +03/05/2022 11:49:16 - INFO - codeparrot_training - Step 39437: {'lr': 0.0004251292991980367, 'samples': 20192256, 'steps': 39437, 'loss/train': 1.410704255104065} +03/05/2022 11:49:16 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 11:49:21 - INFO - codeparrot_training - Step 39438: {'lr': 0.000425125512077314, 'samples': 20192768, 'steps': 39438, 'loss/train': 2.734102964401245} +03/05/2022 11:49:24 - INFO - codeparrot_training - Step 39439: {'lr': 0.00042512172487768244, 'samples': 20193280, 'steps': 39439, 'loss/train': 1.113688349723816} +03/05/2022 11:49:24 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 11:49:29 - INFO - codeparrot_training - Step 39440: {'lr': 0.00042511793759914375, 'samples': 20193792, 'steps': 39440, 'loss/train': 1.6318929195404053} +03/05/2022 11:49:32 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/05/2022 11:49:34 - INFO - codeparrot_training - Step 39441: {'lr': 0.0004251141502416996, 'samples': 20194304, 'steps': 39441, 'loss/train': 1.8793796300888062} +03/05/2022 11:49:38 - INFO - codeparrot_training - Step 39442: {'lr': 0.0004251103628053517, 'samples': 20194816, 'steps': 39442, 'loss/train': 1.4843800067901611} +03/05/2022 11:49:40 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/05/2022 11:49:43 - INFO - codeparrot_training - Step 39443: {'lr': 0.0004251065752901018, 'samples': 20195328, 'steps': 39443, 'loss/train': 1.0333086252212524} +03/05/2022 11:49:46 - INFO - codeparrot_training - Step 39444: {'lr': 0.0004251027876959516, 'samples': 20195840, 'steps': 39444, 'loss/train': 1.5853588581085205} +03/05/2022 11:49:49 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 11:49:51 - INFO - codeparrot_training - Step 39445: {'lr': 0.0004250990000229028, 'samples': 20196352, 'steps': 39445, 'loss/train': 1.4762051105499268} +03/05/2022 11:49:54 - INFO - codeparrot_training - Step 39446: {'lr': 0.00042509521227095706, 'samples': 20196864, 'steps': 39446, 'loss/train': 1.1447510719299316} +03/05/2022 11:49:57 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/05/2022 11:50:00 - INFO - codeparrot_training - Step 39447: {'lr': 0.0004250914244401161, 'samples': 20197376, 'steps': 39447, 'loss/train': 1.9971071481704712} +03/05/2022 11:50:03 - INFO - codeparrot_training - Step 39448: {'lr': 0.00042508763653038167, 'samples': 20197888, 'steps': 39448, 'loss/train': 1.7649434804916382} +03/05/2022 11:50:06 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 11:50:08 - INFO - codeparrot_training - Step 39449: {'lr': 0.0004250838485417554, 'samples': 20198400, 'steps': 39449, 'loss/train': 1.7123974561691284} +03/05/2022 11:50:11 - INFO - codeparrot_training - Step 39450: {'lr': 0.00042508006047423916, 'samples': 20198912, 'steps': 39450, 'loss/train': 2.8729848861694336} +03/05/2022 11:50:14 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/05/2022 11:50:17 - INFO - codeparrot_training - Step 39451: {'lr': 0.0004250762723278344, 'samples': 20199424, 'steps': 39451, 'loss/train': 1.2746347188949585} +03/05/2022 11:50:20 - INFO - codeparrot_training - Step 39452: {'lr': 0.00042507248410254307, 'samples': 20199936, 'steps': 39452, 'loss/train': 2.23582124710083} +03/05/2022 11:50:22 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/05/2022 11:50:25 - INFO - codeparrot_training - Step 39453: {'lr': 0.0004250686957983668, 'samples': 20200448, 'steps': 39453, 'loss/train': 1.8270596265792847} +03/05/2022 11:50:28 - INFO - codeparrot_training - Step 39454: {'lr': 0.00042506490741530724, 'samples': 20200960, 'steps': 39454, 'loss/train': 2.107948064804077} +03/05/2022 11:50:31 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/05/2022 11:50:33 - INFO - codeparrot_training - Step 39455: {'lr': 0.00042506111895336616, 'samples': 20201472, 'steps': 39455, 'loss/train': 1.8282709121704102} +03/05/2022 11:50:37 - INFO - codeparrot_training - Step 39456: {'lr': 0.00042505733041254526, 'samples': 20201984, 'steps': 39456, 'loss/train': 2.855403184890747} +03/05/2022 11:50:39 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 11:50:42 - INFO - codeparrot_training - Step 39457: {'lr': 0.00042505354179284615, 'samples': 20202496, 'steps': 39457, 'loss/train': 1.669367790222168} +03/05/2022 11:50:46 - INFO - codeparrot_training - Step 39458: {'lr': 0.00042504975309427064, 'samples': 20203008, 'steps': 39458, 'loss/train': 2.22339129447937} +03/05/2022 11:50:49 - INFO - codeparrot_training - Step 39459: {'lr': 0.0004250459643168204, 'samples': 20203520, 'steps': 39459, 'loss/train': 1.855828046798706} +03/05/2022 11:50:50 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 11:50:54 - INFO - codeparrot_training - Step 39460: {'lr': 0.0004250421754604972, 'samples': 20204032, 'steps': 39460, 'loss/train': 1.3507159948349} +03/05/2022 11:50:57 - INFO - codeparrot_training - Step 39461: {'lr': 0.0004250383865253027, 'samples': 20204544, 'steps': 39461, 'loss/train': 1.6053780317306519} +03/05/2022 11:50:59 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/05/2022 11:51:03 - INFO - codeparrot_training - Step 39462: {'lr': 0.00042503459751123854, 'samples': 20205056, 'steps': 39462, 'loss/train': 2.222770929336548} +03/05/2022 11:51:06 - INFO - codeparrot_training - Step 39463: {'lr': 0.00042503080841830654, 'samples': 20205568, 'steps': 39463, 'loss/train': 2.216947078704834} +03/05/2022 11:51:07 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 11:51:11 - INFO - codeparrot_training - Step 39464: {'lr': 0.0004250270192465083, 'samples': 20206080, 'steps': 39464, 'loss/train': 1.7197332382202148} +03/05/2022 11:51:14 - INFO - codeparrot_training - Step 39465: {'lr': 0.0004250232299958456, 'samples': 20206592, 'steps': 39465, 'loss/train': 1.95314359664917} +03/05/2022 11:51:16 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/05/2022 11:51:19 - INFO - codeparrot_training - Step 39466: {'lr': 0.0004250194406663203, 'samples': 20207104, 'steps': 39466, 'loss/train': 1.0625832080841064} +03/05/2022 11:51:23 - INFO - codeparrot_training - Step 39467: {'lr': 0.00042501565125793375, 'samples': 20207616, 'steps': 39467, 'loss/train': 1.4129672050476074} +03/05/2022 11:51:28 - INFO - codeparrot_training - Step 39468: {'lr': 0.0004250118617706879, 'samples': 20208128, 'steps': 39468, 'loss/train': 1.4999332427978516} +03/05/2022 11:51:31 - INFO - codeparrot_training - Step 39469: {'lr': 0.0004250080722045844, 'samples': 20208640, 'steps': 39469, 'loss/train': 1.7277971506118774} +03/05/2022 11:51:32 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 11:51:37 - INFO - codeparrot_training - Step 39470: {'lr': 0.000425004282559625, 'samples': 20209152, 'steps': 39470, 'loss/train': 2.629648447036743} +03/05/2022 11:51:40 - INFO - codeparrot_training - Step 39471: {'lr': 0.0004250004928358113, 'samples': 20209664, 'steps': 39471, 'loss/train': 0.9873744249343872} +03/05/2022 11:51:43 - INFO - codeparrot_training - Step 39472: {'lr': 0.0004249967030331451, 'samples': 20210176, 'steps': 39472, 'loss/train': 1.040157675743103} +03/05/2022 11:51:43 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 11:51:48 - INFO - codeparrot_training - Step 39473: {'lr': 0.0004249929131516281, 'samples': 20210688, 'steps': 39473, 'loss/train': 2.040724039077759} +03/05/2022 11:51:51 - INFO - codeparrot_training - Step 39474: {'lr': 0.00042498912319126206, 'samples': 20211200, 'steps': 39474, 'loss/train': 1.758864164352417} +03/05/2022 11:51:52 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/05/2022 11:51:57 - INFO - codeparrot_training - Step 39475: {'lr': 0.00042498533315204855, 'samples': 20211712, 'steps': 39475, 'loss/train': 2.1586499214172363} +03/05/2022 11:52:00 - INFO - codeparrot_training - Step 39476: {'lr': 0.0004249815430339894, 'samples': 20212224, 'steps': 39476, 'loss/train': 1.8622807264328003} +03/05/2022 11:52:00 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/05/2022 11:52:05 - INFO - codeparrot_training - Step 39477: {'lr': 0.0004249777528370862, 'samples': 20212736, 'steps': 39477, 'loss/train': 1.7885797023773193} +03/05/2022 11:52:08 - INFO - codeparrot_training - Step 39478: {'lr': 0.00042497396256134073, 'samples': 20213248, 'steps': 39478, 'loss/train': 1.6299128532409668} +03/05/2022 11:52:09 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/05/2022 11:52:14 - INFO - codeparrot_training - Step 39479: {'lr': 0.0004249701722067547, 'samples': 20213760, 'steps': 39479, 'loss/train': 1.1913502216339111} +03/05/2022 11:52:17 - INFO - codeparrot_training - Step 39480: {'lr': 0.0004249663817733298, 'samples': 20214272, 'steps': 39480, 'loss/train': 2.6169216632843018} +03/05/2022 11:52:18 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 11:52:22 - INFO - codeparrot_training - Step 39481: {'lr': 0.00042496259126106786, 'samples': 20214784, 'steps': 39481, 'loss/train': 2.060624599456787} +03/05/2022 11:52:25 - INFO - codeparrot_training - Step 39482: {'lr': 0.0004249588006699704, 'samples': 20215296, 'steps': 39482, 'loss/train': 1.0312587022781372} +03/05/2022 11:52:26 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 11:52:30 - INFO - codeparrot_training - Step 39483: {'lr': 0.0004249550100000392, 'samples': 20215808, 'steps': 39483, 'loss/train': 2.4456348419189453} +03/05/2022 11:52:34 - INFO - codeparrot_training - Step 39484: {'lr': 0.0004249512192512759, 'samples': 20216320, 'steps': 39484, 'loss/train': 2.171936273574829} +03/05/2022 11:52:35 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 11:52:39 - INFO - codeparrot_training - Step 39485: {'lr': 0.0004249474284236824, 'samples': 20216832, 'steps': 39485, 'loss/train': 2.120490789413452} +03/05/2022 11:52:42 - INFO - codeparrot_training - Step 39486: {'lr': 0.0004249436375172602, 'samples': 20217344, 'steps': 39486, 'loss/train': 1.7743561267852783} +03/05/2022 11:52:43 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 11:52:47 - INFO - codeparrot_training - Step 39487: {'lr': 0.0004249398465320111, 'samples': 20217856, 'steps': 39487, 'loss/train': 1.7385261058807373} +03/05/2022 11:52:50 - INFO - codeparrot_training - Step 39488: {'lr': 0.0004249360554679369, 'samples': 20218368, 'steps': 39488, 'loss/train': 1.34467613697052} +03/05/2022 11:52:51 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/05/2022 11:52:56 - INFO - codeparrot_training - Step 39489: {'lr': 0.00042493226432503917, 'samples': 20218880, 'steps': 39489, 'loss/train': 1.5516873598098755} +03/05/2022 11:52:59 - INFO - codeparrot_training - Step 39490: {'lr': 0.00042492847310331963, 'samples': 20219392, 'steps': 39490, 'loss/train': 1.0296729803085327} +03/05/2022 11:53:00 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/05/2022 11:53:04 - INFO - codeparrot_training - Step 39491: {'lr': 0.00042492468180278, 'samples': 20219904, 'steps': 39491, 'loss/train': 1.2881265878677368} +03/05/2022 11:53:07 - INFO - codeparrot_training - Step 39492: {'lr': 0.000424920890423422, 'samples': 20220416, 'steps': 39492, 'loss/train': 0.13477547466754913} +03/05/2022 11:53:08 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 11:53:13 - INFO - codeparrot_training - Step 39493: {'lr': 0.0004249170989652474, 'samples': 20220928, 'steps': 39493, 'loss/train': 1.692677617073059} +03/05/2022 11:53:16 - INFO - codeparrot_training - Step 39494: {'lr': 0.00042491330742825783, 'samples': 20221440, 'steps': 39494, 'loss/train': 1.644914984703064} +03/05/2022 11:53:17 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 11:53:21 - INFO - codeparrot_training - Step 39495: {'lr': 0.0004249095158124551, 'samples': 20221952, 'steps': 39495, 'loss/train': 1.5115821361541748} +03/05/2022 11:53:24 - INFO - codeparrot_training - Step 39496: {'lr': 0.0004249057241178407, 'samples': 20222464, 'steps': 39496, 'loss/train': 1.1253011226654053} +03/05/2022 11:53:25 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 11:53:30 - INFO - codeparrot_training - Step 39497: {'lr': 0.00042490193234441656, 'samples': 20222976, 'steps': 39497, 'loss/train': 1.4406445026397705} +03/05/2022 11:53:33 - INFO - codeparrot_training - Step 39498: {'lr': 0.00042489814049218434, 'samples': 20223488, 'steps': 39498, 'loss/train': 1.6908156871795654} +03/05/2022 11:53:33 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/05/2022 11:53:38 - INFO - codeparrot_training - Step 39499: {'lr': 0.00042489434856114565, 'samples': 20224000, 'steps': 39499, 'loss/train': 1.869831919670105} +03/05/2022 11:53:41 - INFO - codeparrot_training - Step 39500: {'lr': 0.00042489055655130226, 'samples': 20224512, 'steps': 39500, 'loss/train': 1.6268796920776367} +03/05/2022 11:53:42 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 11:53:46 - INFO - codeparrot_training - Step 39501: {'lr': 0.00042488676446265596, 'samples': 20225024, 'steps': 39501, 'loss/train': 1.6019048690795898} +03/05/2022 11:53:50 - INFO - codeparrot_training - Step 39502: {'lr': 0.00042488297229520834, 'samples': 20225536, 'steps': 39502, 'loss/train': 1.501145362854004} +03/05/2022 11:53:50 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 11:53:55 - INFO - codeparrot_training - Step 39503: {'lr': 0.00042487918004896117, 'samples': 20226048, 'steps': 39503, 'loss/train': 2.0185112953186035} +03/05/2022 11:53:58 - INFO - codeparrot_training - Step 39504: {'lr': 0.0004248753877239161, 'samples': 20226560, 'steps': 39504, 'loss/train': 1.2451941967010498} +03/05/2022 11:53:59 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/05/2022 11:54:04 - INFO - codeparrot_training - Step 39505: {'lr': 0.0004248715953200749, 'samples': 20227072, 'steps': 39505, 'loss/train': 1.0127921104431152} +03/05/2022 11:54:07 - INFO - codeparrot_training - Step 39506: {'lr': 0.00042486780283743927, 'samples': 20227584, 'steps': 39506, 'loss/train': 1.2920727729797363} +03/05/2022 11:54:07 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 11:54:12 - INFO - codeparrot_training - Step 39507: {'lr': 0.00042486401027601084, 'samples': 20228096, 'steps': 39507, 'loss/train': 1.7099744081497192} +03/05/2022 11:54:15 - INFO - codeparrot_training - Step 39508: {'lr': 0.0004248602176357915, 'samples': 20228608, 'steps': 39508, 'loss/train': 1.0340852737426758} +03/05/2022 11:54:16 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 11:54:20 - INFO - codeparrot_training - Step 39509: {'lr': 0.0004248564249167828, 'samples': 20229120, 'steps': 39509, 'loss/train': 1.9557068347930908} +03/05/2022 11:54:24 - INFO - codeparrot_training - Step 39510: {'lr': 0.00042485263211898647, 'samples': 20229632, 'steps': 39510, 'loss/train': 1.3115252256393433} +03/05/2022 11:54:24 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 11:54:29 - INFO - codeparrot_training - Step 39511: {'lr': 0.00042484883924240427, 'samples': 20230144, 'steps': 39511, 'loss/train': 1.6810437440872192} +03/05/2022 11:54:32 - INFO - codeparrot_training - Step 39512: {'lr': 0.0004248450462870378, 'samples': 20230656, 'steps': 39512, 'loss/train': 1.4425575733184814} +03/05/2022 11:54:32 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/05/2022 11:54:37 - INFO - codeparrot_training - Step 39513: {'lr': 0.0004248412532528889, 'samples': 20231168, 'steps': 39513, 'loss/train': 1.3789643049240112} +03/05/2022 11:54:40 - INFO - codeparrot_training - Step 39514: {'lr': 0.00042483746013995924, 'samples': 20231680, 'steps': 39514, 'loss/train': 1.0510631799697876} +03/05/2022 11:54:41 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/05/2022 11:54:46 - INFO - codeparrot_training - Step 39515: {'lr': 0.00042483366694825054, 'samples': 20232192, 'steps': 39515, 'loss/train': 1.113072395324707} +03/05/2022 11:54:49 - INFO - codeparrot_training - Step 39516: {'lr': 0.0004248298736777645, 'samples': 20232704, 'steps': 39516, 'loss/train': 2.0469138622283936} +03/05/2022 11:54:49 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/05/2022 11:54:54 - INFO - codeparrot_training - Step 39517: {'lr': 0.00042482608032850275, 'samples': 20233216, 'steps': 39517, 'loss/train': 1.9167791604995728} +03/05/2022 11:54:57 - INFO - codeparrot_training - Step 39518: {'lr': 0.0004248222869004671, 'samples': 20233728, 'steps': 39518, 'loss/train': 1.6522334814071655} +03/05/2022 11:54:58 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 11:55:03 - INFO - codeparrot_training - Step 39519: {'lr': 0.0004248184933936592, 'samples': 20234240, 'steps': 39519, 'loss/train': 1.650833010673523} +03/05/2022 11:55:06 - INFO - codeparrot_training - Step 39520: {'lr': 0.0004248146998080808, 'samples': 20234752, 'steps': 39520, 'loss/train': 1.7325822114944458} +03/05/2022 11:55:06 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 11:55:11 - INFO - codeparrot_training - Step 39521: {'lr': 0.00042481090614373364, 'samples': 20235264, 'steps': 39521, 'loss/train': 1.1291372776031494} +03/05/2022 11:55:14 - INFO - codeparrot_training - Step 39522: {'lr': 0.00042480711240061933, 'samples': 20235776, 'steps': 39522, 'loss/train': 1.198961615562439} +03/05/2022 11:55:15 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 11:55:19 - INFO - codeparrot_training - Step 39523: {'lr': 0.0004248033185787397, 'samples': 20236288, 'steps': 39523, 'loss/train': 1.5488234758377075} +03/05/2022 11:55:23 - INFO - codeparrot_training - Step 39524: {'lr': 0.00042479952467809623, 'samples': 20236800, 'steps': 39524, 'loss/train': 1.844227910041809} +03/05/2022 11:55:23 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/05/2022 11:55:28 - INFO - codeparrot_training - Step 39525: {'lr': 0.00042479573069869095, 'samples': 20237312, 'steps': 39525, 'loss/train': 1.8904612064361572} +03/05/2022 11:55:31 - INFO - codeparrot_training - Step 39526: {'lr': 0.0004247919366405253, 'samples': 20237824, 'steps': 39526, 'loss/train': 2.221334457397461} +03/05/2022 11:55:31 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 11:55:36 - INFO - codeparrot_training - Step 39527: {'lr': 0.0004247881425036012, 'samples': 20238336, 'steps': 39527, 'loss/train': 1.5650317668914795} +03/05/2022 11:55:40 - INFO - codeparrot_training - Step 39528: {'lr': 0.00042478434828792025, 'samples': 20238848, 'steps': 39528, 'loss/train': 1.0806422233581543} +03/05/2022 11:55:40 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/05/2022 11:55:45 - INFO - codeparrot_training - Step 39529: {'lr': 0.00042478055399348415, 'samples': 20239360, 'steps': 39529, 'loss/train': 1.3216396570205688} +03/05/2022 11:55:48 - INFO - codeparrot_training - Step 39530: {'lr': 0.0004247767596202946, 'samples': 20239872, 'steps': 39530, 'loss/train': 1.5908998250961304} +03/05/2022 11:55:48 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/05/2022 11:55:53 - INFO - codeparrot_training - Step 39531: {'lr': 0.00042477296516835335, 'samples': 20240384, 'steps': 39531, 'loss/train': 1.744179368019104} +03/05/2022 11:55:57 - INFO - codeparrot_training - Step 39532: {'lr': 0.00042476917063766207, 'samples': 20240896, 'steps': 39532, 'loss/train': 1.5387765169143677} +03/05/2022 11:55:57 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 11:56:02 - INFO - codeparrot_training - Step 39533: {'lr': 0.0004247653760282225, 'samples': 20241408, 'steps': 39533, 'loss/train': 1.7099334001541138} +03/05/2022 11:56:05 - INFO - codeparrot_training - Step 39534: {'lr': 0.0004247615813400364, 'samples': 20241920, 'steps': 39534, 'loss/train': 1.4926204681396484} +03/05/2022 11:56:05 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/05/2022 11:56:10 - INFO - codeparrot_training - Step 39535: {'lr': 0.0004247577865731055, 'samples': 20242432, 'steps': 39535, 'loss/train': 1.567651629447937} +03/05/2022 11:56:13 - INFO - codeparrot_training - Step 39536: {'lr': 0.00042475399172743134, 'samples': 20242944, 'steps': 39536, 'loss/train': 1.1760225296020508} +03/05/2022 11:56:14 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 11:56:19 - INFO - codeparrot_training - Step 39537: {'lr': 0.0004247501968030157, 'samples': 20243456, 'steps': 39537, 'loss/train': 1.4166630506515503} +03/05/2022 11:56:22 - INFO - codeparrot_training - Step 39538: {'lr': 0.00042474640179986035, 'samples': 20243968, 'steps': 39538, 'loss/train': 1.446031928062439} +03/05/2022 11:56:22 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/05/2022 11:56:27 - INFO - codeparrot_training - Step 39539: {'lr': 0.00042474260671796697, 'samples': 20244480, 'steps': 39539, 'loss/train': 1.656272292137146} +03/05/2022 11:56:30 - INFO - codeparrot_training - Step 39540: {'lr': 0.0004247388115573373, 'samples': 20244992, 'steps': 39540, 'loss/train': 2.3414535522460938} +03/05/2022 11:56:31 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 11:56:36 - INFO - codeparrot_training - Step 39541: {'lr': 0.00042473501631797294, 'samples': 20245504, 'steps': 39541, 'loss/train': 4.964019775390625} +03/05/2022 11:56:39 - INFO - codeparrot_training - Step 39542: {'lr': 0.0004247312209998758, 'samples': 20246016, 'steps': 39542, 'loss/train': 1.6206120252609253} +03/05/2022 11:56:39 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 11:56:44 - INFO - codeparrot_training - Step 39543: {'lr': 0.00042472742560304734, 'samples': 20246528, 'steps': 39543, 'loss/train': 1.6710517406463623} +03/05/2022 11:56:47 - INFO - codeparrot_training - Step 39544: {'lr': 0.00042472363012748947, 'samples': 20247040, 'steps': 39544, 'loss/train': 1.3930811882019043} +03/05/2022 11:56:48 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 11:56:53 - INFO - codeparrot_training - Step 39545: {'lr': 0.00042471983457320384, 'samples': 20247552, 'steps': 39545, 'loss/train': 0.7628999352455139} +03/05/2022 11:56:56 - INFO - codeparrot_training - Step 39546: {'lr': 0.00042471603894019206, 'samples': 20248064, 'steps': 39546, 'loss/train': 1.9054125547409058} +03/05/2022 11:56:57 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/05/2022 11:57:01 - INFO - codeparrot_training - Step 39547: {'lr': 0.00042471224322845603, 'samples': 20248576, 'steps': 39547, 'loss/train': 1.6174657344818115} +03/05/2022 11:57:04 - INFO - codeparrot_training - Step 39548: {'lr': 0.00042470844743799734, 'samples': 20249088, 'steps': 39548, 'loss/train': 1.5804929733276367} +03/05/2022 11:57:05 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 11:57:10 - INFO - codeparrot_training - Step 39549: {'lr': 0.00042470465156881765, 'samples': 20249600, 'steps': 39549, 'loss/train': 1.9246913194656372} +03/05/2022 11:57:13 - INFO - codeparrot_training - Step 39550: {'lr': 0.00042470085562091887, 'samples': 20250112, 'steps': 39550, 'loss/train': 1.4813987016677856} +03/05/2022 11:57:13 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/05/2022 11:57:18 - INFO - codeparrot_training - Step 39551: {'lr': 0.0004246970595943025, 'samples': 20250624, 'steps': 39551, 'loss/train': 1.9544329643249512} +03/05/2022 11:57:21 - INFO - codeparrot_training - Step 39552: {'lr': 0.0004246932634889703, 'samples': 20251136, 'steps': 39552, 'loss/train': 2.1707763671875} +03/05/2022 11:57:22 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/05/2022 11:57:26 - INFO - codeparrot_training - Step 39553: {'lr': 0.00042468946730492404, 'samples': 20251648, 'steps': 39553, 'loss/train': 2.0041427612304688} +03/05/2022 11:57:30 - INFO - codeparrot_training - Step 39554: {'lr': 0.00042468567104216536, 'samples': 20252160, 'steps': 39554, 'loss/train': 2.112600088119507} +03/05/2022 11:57:30 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 11:57:35 - INFO - codeparrot_training - Step 39555: {'lr': 0.0004246818747006961, 'samples': 20252672, 'steps': 39555, 'loss/train': 1.4566302299499512} +03/05/2022 11:57:38 - INFO - codeparrot_training - Step 39556: {'lr': 0.00042467807828051787, 'samples': 20253184, 'steps': 39556, 'loss/train': 1.1891714334487915} +03/05/2022 11:57:38 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/05/2022 11:57:43 - INFO - codeparrot_training - Step 39557: {'lr': 0.0004246742817816323, 'samples': 20253696, 'steps': 39557, 'loss/train': 0.3847522735595703} +03/05/2022 11:57:47 - INFO - codeparrot_training - Step 39558: {'lr': 0.00042467048520404126, 'samples': 20254208, 'steps': 39558, 'loss/train': 2.243648052215576} +03/05/2022 11:57:52 - INFO - codeparrot_training - Step 39559: {'lr': 0.00042466668854774636, 'samples': 20254720, 'steps': 39559, 'loss/train': 1.6022562980651855} +03/05/2022 11:57:55 - INFO - codeparrot_training - Step 39560: {'lr': 0.00042466289181274943, 'samples': 20255232, 'steps': 39560, 'loss/train': 1.8534235954284668} +03/05/2022 11:57:55 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 11:58:00 - INFO - codeparrot_training - Step 39561: {'lr': 0.00042465909499905206, 'samples': 20255744, 'steps': 39561, 'loss/train': 1.7306753396987915} +03/05/2022 11:58:03 - INFO - codeparrot_training - Step 39562: {'lr': 0.0004246552981066559, 'samples': 20256256, 'steps': 39562, 'loss/train': 1.179840326309204} +03/05/2022 11:58:03 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/05/2022 11:58:09 - INFO - codeparrot_training - Step 39563: {'lr': 0.0004246515011355629, 'samples': 20256768, 'steps': 39563, 'loss/train': 2.085270881652832} +03/05/2022 11:58:12 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/05/2022 11:58:14 - INFO - codeparrot_training - Step 39564: {'lr': 0.0004246477040857746, 'samples': 20257280, 'steps': 39564, 'loss/train': 1.5155251026153564} +03/05/2022 11:58:17 - INFO - codeparrot_training - Step 39565: {'lr': 0.0004246439069572926, 'samples': 20257792, 'steps': 39565, 'loss/train': 1.6817643642425537} +03/05/2022 11:58:20 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/05/2022 11:58:22 - INFO - codeparrot_training - Step 39566: {'lr': 0.00042464010975011893, 'samples': 20258304, 'steps': 39566, 'loss/train': 1.6983777284622192} +03/05/2022 11:58:26 - INFO - codeparrot_training - Step 39567: {'lr': 0.00042463631246425504, 'samples': 20258816, 'steps': 39567, 'loss/train': 1.594313383102417} +03/05/2022 11:58:28 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 11:58:31 - INFO - codeparrot_training - Step 39568: {'lr': 0.0004246325150997027, 'samples': 20259328, 'steps': 39568, 'loss/train': 1.844613790512085} +03/05/2022 11:58:34 - INFO - codeparrot_training - Step 39569: {'lr': 0.0004246287176564637, 'samples': 20259840, 'steps': 39569, 'loss/train': 3.7851123809814453} +03/05/2022 11:58:37 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 11:58:39 - INFO - codeparrot_training - Step 39570: {'lr': 0.0004246249201345397, 'samples': 20260352, 'steps': 39570, 'loss/train': 1.89006769657135} +03/05/2022 11:58:43 - INFO - codeparrot_training - Step 39571: {'lr': 0.0004246211225339323, 'samples': 20260864, 'steps': 39571, 'loss/train': 2.365574598312378} +03/05/2022 11:58:45 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 11:58:48 - INFO - codeparrot_training - Step 39572: {'lr': 0.0004246173248546434, 'samples': 20261376, 'steps': 39572, 'loss/train': 1.5291794538497925} +03/05/2022 11:58:51 - INFO - codeparrot_training - Step 39573: {'lr': 0.0004246135270966747, 'samples': 20261888, 'steps': 39573, 'loss/train': 1.8303419351577759} +03/05/2022 11:58:54 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 11:58:56 - INFO - codeparrot_training - Step 39574: {'lr': 0.00042460972926002774, 'samples': 20262400, 'steps': 39574, 'loss/train': 1.794812560081482} +03/05/2022 11:59:00 - INFO - codeparrot_training - Step 39575: {'lr': 0.00042460593134470426, 'samples': 20262912, 'steps': 39575, 'loss/train': 1.5292181968688965} +03/05/2022 11:59:02 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 11:59:05 - INFO - codeparrot_training - Step 39576: {'lr': 0.0004246021333507062, 'samples': 20263424, 'steps': 39576, 'loss/train': 2.067171812057495} +03/05/2022 11:59:08 - INFO - codeparrot_training - Step 39577: {'lr': 0.00042459833527803503, 'samples': 20263936, 'steps': 39577, 'loss/train': 1.3167665004730225} +03/05/2022 11:59:11 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/05/2022 11:59:13 - INFO - codeparrot_training - Step 39578: {'lr': 0.00042459453712669255, 'samples': 20264448, 'steps': 39578, 'loss/train': 1.8588565587997437} +03/05/2022 11:59:16 - INFO - codeparrot_training - Step 39579: {'lr': 0.0004245907388966804, 'samples': 20264960, 'steps': 39579, 'loss/train': 1.0150758028030396} +03/05/2022 11:59:19 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 11:59:22 - INFO - codeparrot_training - Step 39580: {'lr': 0.0004245869405880005, 'samples': 20265472, 'steps': 39580, 'loss/train': 1.5160540342330933} +03/05/2022 11:59:25 - INFO - codeparrot_training - Step 39581: {'lr': 0.0004245831422006543, 'samples': 20265984, 'steps': 39581, 'loss/train': 1.8087637424468994} +03/05/2022 11:59:27 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/05/2022 11:59:30 - INFO - codeparrot_training - Step 39582: {'lr': 0.0004245793437346437, 'samples': 20266496, 'steps': 39582, 'loss/train': 1.4180867671966553} +03/05/2022 11:59:33 - INFO - codeparrot_training - Step 39583: {'lr': 0.0004245755451899703, 'samples': 20267008, 'steps': 39583, 'loss/train': 1.6178076267242432} +03/05/2022 11:59:35 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 11:59:38 - INFO - codeparrot_training - Step 39584: {'lr': 0.0004245717465666359, 'samples': 20267520, 'steps': 39584, 'loss/train': 1.2121411561965942} +03/05/2022 11:59:42 - INFO - codeparrot_training - Step 39585: {'lr': 0.0004245679478646421, 'samples': 20268032, 'steps': 39585, 'loss/train': 2.4329044818878174} +03/05/2022 11:59:44 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 11:59:47 - INFO - codeparrot_training - Step 39586: {'lr': 0.00042456414908399075, 'samples': 20268544, 'steps': 39586, 'loss/train': 3.139448404312134} +03/05/2022 11:59:50 - INFO - codeparrot_training - Step 39587: {'lr': 0.00042456035022468344, 'samples': 20269056, 'steps': 39587, 'loss/train': 1.6371091604232788} +03/05/2022 11:59:52 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 11:59:55 - INFO - codeparrot_training - Step 39588: {'lr': 0.0004245565512867219, 'samples': 20269568, 'steps': 39588, 'loss/train': 0.5925611853599548} +03/05/2022 11:59:58 - INFO - codeparrot_training - Step 39589: {'lr': 0.000424552752270108, 'samples': 20270080, 'steps': 39589, 'loss/train': 1.1517635583877563} +03/05/2022 12:00:00 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 12:00:04 - INFO - codeparrot_training - Step 39590: {'lr': 0.0004245489531748432, 'samples': 20270592, 'steps': 39590, 'loss/train': 1.3761332035064697} +03/05/2022 12:00:07 - INFO - codeparrot_training - Step 39591: {'lr': 0.00042454515400092944, 'samples': 20271104, 'steps': 39591, 'loss/train': 3.053755521774292} +03/05/2022 12:00:09 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 12:00:12 - INFO - codeparrot_training - Step 39592: {'lr': 0.00042454135474836817, 'samples': 20271616, 'steps': 39592, 'loss/train': 0.9860255122184753} +03/05/2022 12:00:15 - INFO - codeparrot_training - Step 39593: {'lr': 0.0004245375554171613, 'samples': 20272128, 'steps': 39593, 'loss/train': 1.7060869932174683} +03/05/2022 12:00:17 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 12:00:21 - INFO - codeparrot_training - Step 39594: {'lr': 0.00042453375600731057, 'samples': 20272640, 'steps': 39594, 'loss/train': 1.8741410970687866} +03/05/2022 12:00:24 - INFO - codeparrot_training - Step 39595: {'lr': 0.00042452995651881764, 'samples': 20273152, 'steps': 39595, 'loss/train': 1.7867883443832397} +03/05/2022 12:00:26 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 12:00:29 - INFO - codeparrot_training - Step 39596: {'lr': 0.0004245261569516842, 'samples': 20273664, 'steps': 39596, 'loss/train': 1.765379548072815} +03/05/2022 12:00:32 - INFO - codeparrot_training - Step 39597: {'lr': 0.00042452235730591195, 'samples': 20274176, 'steps': 39597, 'loss/train': 2.8699889183044434} +03/05/2022 12:00:34 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/05/2022 12:00:38 - INFO - codeparrot_training - Step 39598: {'lr': 0.00042451855758150254, 'samples': 20274688, 'steps': 39598, 'loss/train': 1.326201319694519} +03/05/2022 12:00:41 - INFO - codeparrot_training - Step 39599: {'lr': 0.00042451475777845784, 'samples': 20275200, 'steps': 39599, 'loss/train': 2.5599732398986816} +03/05/2022 12:00:45 - INFO - codeparrot_training - Step 39600: {'lr': 0.00042451095789677943, 'samples': 20275712, 'steps': 39600, 'loss/train': 0.4435458779335022} +03/05/2022 12:00:45 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/05/2022 12:00:50 - INFO - codeparrot_training - Step 39601: {'lr': 0.0004245071579364691, 'samples': 20276224, 'steps': 39601, 'loss/train': 1.8608115911483765} +03/05/2022 12:00:53 - INFO - codeparrot_training - Step 39602: {'lr': 0.0004245033578975286, 'samples': 20276736, 'steps': 39602, 'loss/train': 1.898787498474121} +03/05/2022 12:00:54 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/05/2022 12:00:58 - INFO - codeparrot_training - Step 39603: {'lr': 0.00042449955777995954, 'samples': 20277248, 'steps': 39603, 'loss/train': 1.3378511667251587} +03/05/2022 12:01:01 - INFO - codeparrot_training - Step 39604: {'lr': 0.0004244957575837636, 'samples': 20277760, 'steps': 39604, 'loss/train': 1.4231407642364502} +03/05/2022 12:01:02 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/05/2022 12:01:07 - INFO - codeparrot_training - Step 39605: {'lr': 0.00042449195730894266, 'samples': 20278272, 'steps': 39605, 'loss/train': 1.2702012062072754} +03/05/2022 12:01:10 - INFO - codeparrot_training - Step 39606: {'lr': 0.00042448815695549823, 'samples': 20278784, 'steps': 39606, 'loss/train': 1.491012692451477} +03/05/2022 12:01:11 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/05/2022 12:01:15 - INFO - codeparrot_training - Step 39607: {'lr': 0.00042448435652343223, 'samples': 20279296, 'steps': 39607, 'loss/train': 0.8863868713378906} +03/05/2022 12:01:18 - INFO - codeparrot_training - Step 39608: {'lr': 0.0004244805560127463, 'samples': 20279808, 'steps': 39608, 'loss/train': 1.8586546182632446} +03/05/2022 12:01:19 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/05/2022 12:01:23 - INFO - codeparrot_training - Step 39609: {'lr': 0.00042447675542344203, 'samples': 20280320, 'steps': 39609, 'loss/train': 2.4568965435028076} +03/05/2022 12:01:27 - INFO - codeparrot_training - Step 39610: {'lr': 0.0004244729547555213, 'samples': 20280832, 'steps': 39610, 'loss/train': 1.5476725101470947} +03/05/2022 12:01:27 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/05/2022 12:01:32 - INFO - codeparrot_training - Step 39611: {'lr': 0.00042446915400898565, 'samples': 20281344, 'steps': 39611, 'loss/train': 1.7416270971298218} +03/05/2022 12:01:35 - INFO - codeparrot_training - Step 39612: {'lr': 0.00042446535318383695, 'samples': 20281856, 'steps': 39612, 'loss/train': 1.463618516921997} +03/05/2022 12:01:35 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 12:01:40 - INFO - codeparrot_training - Step 39613: {'lr': 0.00042446155228007687, 'samples': 20282368, 'steps': 39613, 'loss/train': 1.0853447914123535} +03/05/2022 12:01:43 - INFO - codeparrot_training - Step 39614: {'lr': 0.0004244577512977071, 'samples': 20282880, 'steps': 39614, 'loss/train': 2.3059446811676025} +03/05/2022 12:01:44 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/05/2022 12:01:49 - INFO - codeparrot_training - Step 39615: {'lr': 0.00042445395023672935, 'samples': 20283392, 'steps': 39615, 'loss/train': 1.810790777206421} +03/05/2022 12:01:52 - INFO - codeparrot_training - Step 39616: {'lr': 0.0004244501490971454, 'samples': 20283904, 'steps': 39616, 'loss/train': 1.2429416179656982} +03/05/2022 12:01:53 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 12:01:57 - INFO - codeparrot_training - Step 39617: {'lr': 0.0004244463478789568, 'samples': 20284416, 'steps': 39617, 'loss/train': 0.4531399607658386} +03/05/2022 12:02:00 - INFO - codeparrot_training - Step 39618: {'lr': 0.0004244425465821654, 'samples': 20284928, 'steps': 39618, 'loss/train': 1.4985302686691284} +03/05/2022 12:02:01 - INFO - codeparrot_training - Skipping example with length 255 (seq_length=1024) +03/05/2022 12:02:06 - INFO - codeparrot_training - Step 39619: {'lr': 0.0004244387452067729, 'samples': 20285440, 'steps': 39619, 'loss/train': 2.573594570159912} +03/05/2022 12:02:09 - INFO - codeparrot_training - Step 39620: {'lr': 0.000424434943752781, 'samples': 20285952, 'steps': 39620, 'loss/train': 1.0906153917312622} +03/05/2022 12:02:10 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 12:02:14 - INFO - codeparrot_training - Step 39621: {'lr': 0.0004244311422201914, 'samples': 20286464, 'steps': 39621, 'loss/train': 1.4890276193618774} +03/05/2022 12:02:17 - INFO - codeparrot_training - Step 39622: {'lr': 0.0004244273406090058, 'samples': 20286976, 'steps': 39622, 'loss/train': 1.946203351020813} +03/05/2022 12:02:18 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/05/2022 12:02:22 - INFO - codeparrot_training - Step 39623: {'lr': 0.000424423538919226, 'samples': 20287488, 'steps': 39623, 'loss/train': 0.9325276017189026} +03/05/2022 12:02:26 - INFO - codeparrot_training - Step 39624: {'lr': 0.0004244197371508536, 'samples': 20288000, 'steps': 39624, 'loss/train': 1.359408974647522} +03/05/2022 12:02:26 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/05/2022 12:02:31 - INFO - codeparrot_training - Step 39625: {'lr': 0.00042441593530389025, 'samples': 20288512, 'steps': 39625, 'loss/train': 1.4853960275650024} +03/05/2022 12:02:34 - INFO - codeparrot_training - Step 39626: {'lr': 0.0004244121333783379, 'samples': 20289024, 'steps': 39626, 'loss/train': 1.8192424774169922} +03/05/2022 12:02:35 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 12:02:39 - INFO - codeparrot_training - Step 39627: {'lr': 0.0004244083313741981, 'samples': 20289536, 'steps': 39627, 'loss/train': 1.5496728420257568} +03/05/2022 12:02:43 - INFO - codeparrot_training - Step 39628: {'lr': 0.0004244045292914726, 'samples': 20290048, 'steps': 39628, 'loss/train': 1.4731743335723877} +03/05/2022 12:02:43 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 12:02:48 - INFO - codeparrot_training - Step 39629: {'lr': 0.00042440072713016317, 'samples': 20290560, 'steps': 39629, 'loss/train': 2.234647750854492} +03/05/2022 12:02:51 - INFO - codeparrot_training - Step 39630: {'lr': 0.00042439692489027136, 'samples': 20291072, 'steps': 39630, 'loss/train': 1.787954330444336} +03/05/2022 12:02:51 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 12:02:56 - INFO - codeparrot_training - Step 39631: {'lr': 0.000424393122571799, 'samples': 20291584, 'steps': 39631, 'loss/train': 1.6617506742477417} +03/05/2022 12:02:59 - INFO - codeparrot_training - Step 39632: {'lr': 0.00042438932017474783, 'samples': 20292096, 'steps': 39632, 'loss/train': 1.6118909120559692} +03/05/2022 12:03:00 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 12:03:05 - INFO - codeparrot_training - Step 39633: {'lr': 0.0004243855176991195, 'samples': 20292608, 'steps': 39633, 'loss/train': 1.5251413583755493} +03/05/2022 12:03:08 - INFO - codeparrot_training - Step 39634: {'lr': 0.0004243817151449158, 'samples': 20293120, 'steps': 39634, 'loss/train': 1.9425326585769653} +03/05/2022 12:03:09 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 12:03:13 - INFO - codeparrot_training - Step 39635: {'lr': 0.0004243779125121383, 'samples': 20293632, 'steps': 39635, 'loss/train': 2.352893352508545} +03/05/2022 12:03:16 - INFO - codeparrot_training - Step 39636: {'lr': 0.00042437410980078894, 'samples': 20294144, 'steps': 39636, 'loss/train': 1.277471899986267} +03/05/2022 12:03:17 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 12:03:22 - INFO - codeparrot_training - Step 39637: {'lr': 0.0004243703070108692, 'samples': 20294656, 'steps': 39637, 'loss/train': 2.8633956909179688} +03/05/2022 12:03:25 - INFO - codeparrot_training - Step 39638: {'lr': 0.00042436650414238086, 'samples': 20295168, 'steps': 39638, 'loss/train': 1.0687415599822998} +03/05/2022 12:03:25 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/05/2022 12:03:30 - INFO - codeparrot_training - Step 39639: {'lr': 0.0004243627011953257, 'samples': 20295680, 'steps': 39639, 'loss/train': 1.515476942062378} +03/05/2022 12:03:33 - INFO - codeparrot_training - Step 39640: {'lr': 0.0004243588981697054, 'samples': 20296192, 'steps': 39640, 'loss/train': 6.564723968505859} +03/05/2022 12:03:35 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 12:03:39 - INFO - codeparrot_training - Step 39641: {'lr': 0.0004243550950655217, 'samples': 20296704, 'steps': 39641, 'loss/train': 1.58370840549469} +03/05/2022 12:03:42 - INFO - codeparrot_training - Step 39642: {'lr': 0.00042435129188277625, 'samples': 20297216, 'steps': 39642, 'loss/train': 1.7251659631729126} +03/05/2022 12:03:43 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/05/2022 12:03:47 - INFO - codeparrot_training - Step 39643: {'lr': 0.0004243474886214708, 'samples': 20297728, 'steps': 39643, 'loss/train': 1.701038122177124} +03/05/2022 12:03:51 - INFO - codeparrot_training - Step 39644: {'lr': 0.0004243436852816071, 'samples': 20298240, 'steps': 39644, 'loss/train': 1.881474494934082} +03/05/2022 12:03:52 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 12:03:56 - INFO - codeparrot_training - Step 39645: {'lr': 0.0004243398818631868, 'samples': 20298752, 'steps': 39645, 'loss/train': 2.44671368598938} +03/05/2022 12:03:59 - INFO - codeparrot_training - Step 39646: {'lr': 0.0004243360783662116, 'samples': 20299264, 'steps': 39646, 'loss/train': 1.7211681604385376} +03/05/2022 12:04:01 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 12:04:04 - INFO - codeparrot_training - Step 39647: {'lr': 0.0004243322747906833, 'samples': 20299776, 'steps': 39647, 'loss/train': 1.1366709470748901} +03/05/2022 12:04:07 - INFO - codeparrot_training - Step 39648: {'lr': 0.00042432847113660355, 'samples': 20300288, 'steps': 39648, 'loss/train': 1.6064825057983398} +03/05/2022 12:04:09 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 12:04:13 - INFO - codeparrot_training - Step 39649: {'lr': 0.0004243246674039741, 'samples': 20300800, 'steps': 39649, 'loss/train': 0.4563363492488861} +03/05/2022 12:04:16 - INFO - codeparrot_training - Step 39650: {'lr': 0.00042432086359279667, 'samples': 20301312, 'steps': 39650, 'loss/train': 1.8283206224441528} +03/05/2022 12:04:18 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 12:04:21 - INFO - codeparrot_training - Step 39651: {'lr': 0.0004243170597030729, 'samples': 20301824, 'steps': 39651, 'loss/train': 1.2380365133285522} +03/05/2022 12:04:25 - INFO - codeparrot_training - Step 39652: {'lr': 0.0004243132557348045, 'samples': 20302336, 'steps': 39652, 'loss/train': 1.6816068887710571} +03/05/2022 12:04:27 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 12:04:30 - INFO - codeparrot_training - Step 39653: {'lr': 0.00042430945168799326, 'samples': 20302848, 'steps': 39653, 'loss/train': 1.7905917167663574} +03/05/2022 12:04:33 - INFO - codeparrot_training - Step 39654: {'lr': 0.000424305647562641, 'samples': 20303360, 'steps': 39654, 'loss/train': 1.890445590019226} +03/05/2022 12:04:35 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 12:04:38 - INFO - codeparrot_training - Step 39655: {'lr': 0.00042430184335874924, 'samples': 20303872, 'steps': 39655, 'loss/train': 2.282672643661499} +03/05/2022 12:04:41 - INFO - codeparrot_training - Step 39656: {'lr': 0.0004242980390763197, 'samples': 20304384, 'steps': 39656, 'loss/train': 1.4861336946487427} +03/05/2022 12:04:43 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 12:04:47 - INFO - codeparrot_training - Step 39657: {'lr': 0.0004242942347153542, 'samples': 20304896, 'steps': 39657, 'loss/train': 1.4821876287460327} +03/05/2022 12:04:50 - INFO - codeparrot_training - Step 39658: {'lr': 0.00042429043027585435, 'samples': 20305408, 'steps': 39658, 'loss/train': 1.8780018091201782} +03/05/2022 12:04:52 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/05/2022 12:04:55 - INFO - codeparrot_training - Step 39659: {'lr': 0.000424286625757822, 'samples': 20305920, 'steps': 39659, 'loss/train': 1.7281097173690796} +03/05/2022 12:04:58 - INFO - codeparrot_training - Step 39660: {'lr': 0.00042428282116125873, 'samples': 20306432, 'steps': 39660, 'loss/train': 1.4897396564483643} +03/05/2022 12:05:01 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 12:05:03 - INFO - codeparrot_training - Step 39661: {'lr': 0.0004242790164861663, 'samples': 20306944, 'steps': 39661, 'loss/train': 1.6603518724441528} +03/05/2022 12:05:07 - INFO - codeparrot_training - Step 39662: {'lr': 0.0004242752117325465, 'samples': 20307456, 'steps': 39662, 'loss/train': 1.2492740154266357} +03/05/2022 12:05:09 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/05/2022 12:05:12 - INFO - codeparrot_training - Step 39663: {'lr': 0.000424271406900401, 'samples': 20307968, 'steps': 39663, 'loss/train': 1.711969017982483} +03/05/2022 12:05:15 - INFO - codeparrot_training - Step 39664: {'lr': 0.0004242676019897314, 'samples': 20308480, 'steps': 39664, 'loss/train': 1.6771366596221924} +03/05/2022 12:05:17 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 12:05:20 - INFO - codeparrot_training - Step 39665: {'lr': 0.00042426379700053954, 'samples': 20308992, 'steps': 39665, 'loss/train': 1.3521363735198975} +03/05/2022 12:05:23 - INFO - codeparrot_training - Step 39666: {'lr': 0.00042425999193282713, 'samples': 20309504, 'steps': 39666, 'loss/train': 1.3631165027618408} +03/05/2022 12:05:26 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 12:05:29 - INFO - codeparrot_training - Step 39667: {'lr': 0.0004242561867865958, 'samples': 20310016, 'steps': 39667, 'loss/train': 1.9747333526611328} +03/05/2022 12:05:32 - INFO - codeparrot_training - Step 39668: {'lr': 0.0004242523815618473, 'samples': 20310528, 'steps': 39668, 'loss/train': 2.31093692779541} +03/05/2022 12:05:34 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/05/2022 12:05:37 - INFO - codeparrot_training - Step 39669: {'lr': 0.0004242485762585835, 'samples': 20311040, 'steps': 39669, 'loss/train': 1.7373601198196411} +03/05/2022 12:05:40 - INFO - codeparrot_training - Step 39670: {'lr': 0.0004242447708768059, 'samples': 20311552, 'steps': 39670, 'loss/train': 1.2160422801971436} +03/05/2022 12:05:42 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/05/2022 12:05:46 - INFO - codeparrot_training - Step 39671: {'lr': 0.0004242409654165163, 'samples': 20312064, 'steps': 39671, 'loss/train': 1.7019891738891602} +03/05/2022 12:05:49 - INFO - codeparrot_training - Step 39672: {'lr': 0.00042423715987771637, 'samples': 20312576, 'steps': 39672, 'loss/train': 1.844173789024353} +03/05/2022 12:05:51 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 12:05:54 - INFO - codeparrot_training - Step 39673: {'lr': 0.0004242333542604079, 'samples': 20313088, 'steps': 39673, 'loss/train': 0.939050018787384} +03/05/2022 12:05:57 - INFO - codeparrot_training - Step 39674: {'lr': 0.0004242295485645926, 'samples': 20313600, 'steps': 39674, 'loss/train': 2.218341827392578} +03/05/2022 12:05:59 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 12:06:03 - INFO - codeparrot_training - Step 39675: {'lr': 0.0004242257427902721, 'samples': 20314112, 'steps': 39675, 'loss/train': 1.836511492729187} +03/05/2022 12:06:06 - INFO - codeparrot_training - Step 39676: {'lr': 0.00042422193693744827, 'samples': 20314624, 'steps': 39676, 'loss/train': 1.3674324750900269} +03/05/2022 12:06:08 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/05/2022 12:06:11 - INFO - codeparrot_training - Step 39677: {'lr': 0.0004242181310061226, 'samples': 20315136, 'steps': 39677, 'loss/train': 1.8510347604751587} +03/05/2022 12:06:14 - INFO - codeparrot_training - Step 39678: {'lr': 0.000424214324996297, 'samples': 20315648, 'steps': 39678, 'loss/train': 1.9550224542617798} +03/05/2022 12:06:16 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 12:06:19 - INFO - codeparrot_training - Step 39679: {'lr': 0.000424210518907973, 'samples': 20316160, 'steps': 39679, 'loss/train': 1.650903582572937} +03/05/2022 12:06:23 - INFO - codeparrot_training - Step 39680: {'lr': 0.0004242067127411525, 'samples': 20316672, 'steps': 39680, 'loss/train': 2.426980495452881} +03/05/2022 12:06:24 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 12:06:28 - INFO - codeparrot_training - Step 39681: {'lr': 0.0004242029064958372, 'samples': 20317184, 'steps': 39681, 'loss/train': 1.849934458732605} +03/05/2022 12:06:31 - INFO - codeparrot_training - Step 39682: {'lr': 0.0004241991001720287, 'samples': 20317696, 'steps': 39682, 'loss/train': 2.3181679248809814} +03/05/2022 12:06:33 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/05/2022 12:06:36 - INFO - codeparrot_training - Step 39683: {'lr': 0.00042419529376972885, 'samples': 20318208, 'steps': 39683, 'loss/train': 1.6174503564834595} +03/05/2022 12:06:39 - INFO - codeparrot_training - Step 39684: {'lr': 0.0004241914872889392, 'samples': 20318720, 'steps': 39684, 'loss/train': 1.1796391010284424} +03/05/2022 12:06:41 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 12:06:45 - INFO - codeparrot_training - Step 39685: {'lr': 0.00042418768072966163, 'samples': 20319232, 'steps': 39685, 'loss/train': 1.582139253616333} +03/05/2022 12:06:48 - INFO - codeparrot_training - Step 39686: {'lr': 0.0004241838740918977, 'samples': 20319744, 'steps': 39686, 'loss/train': 1.881881833076477} +03/05/2022 12:06:50 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 12:06:53 - INFO - codeparrot_training - Step 39687: {'lr': 0.00042418006737564924, 'samples': 20320256, 'steps': 39687, 'loss/train': 1.5180909633636475} +03/05/2022 12:06:56 - INFO - codeparrot_training - Step 39688: {'lr': 0.0004241762605809179, 'samples': 20320768, 'steps': 39688, 'loss/train': 2.111886739730835} +03/05/2022 12:06:58 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 12:07:02 - INFO - codeparrot_training - Step 39689: {'lr': 0.00042417245370770547, 'samples': 20321280, 'steps': 39689, 'loss/train': 1.8964176177978516} +03/05/2022 12:07:05 - INFO - codeparrot_training - Step 39690: {'lr': 0.00042416864675601365, 'samples': 20321792, 'steps': 39690, 'loss/train': 1.6332628726959229} +03/05/2022 12:07:07 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 12:07:10 - INFO - codeparrot_training - Step 39691: {'lr': 0.0004241648397258441, 'samples': 20322304, 'steps': 39691, 'loss/train': 1.363561749458313} +03/05/2022 12:07:14 - INFO - codeparrot_training - Step 39692: {'lr': 0.0004241610326171985, 'samples': 20322816, 'steps': 39692, 'loss/train': 1.6554824113845825} +03/05/2022 12:07:16 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 12:07:19 - INFO - codeparrot_training - Step 39693: {'lr': 0.0004241572254300786, 'samples': 20323328, 'steps': 39693, 'loss/train': 1.4589018821716309} +03/05/2022 12:07:22 - INFO - codeparrot_training - Step 39694: {'lr': 0.00042415341816448625, 'samples': 20323840, 'steps': 39694, 'loss/train': 1.7236828804016113} +03/05/2022 12:07:24 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 12:07:27 - INFO - codeparrot_training - Step 39695: {'lr': 0.000424149610820423, 'samples': 20324352, 'steps': 39695, 'loss/train': 2.154867172241211} +03/05/2022 12:07:31 - INFO - codeparrot_training - Step 39696: {'lr': 0.00042414580339789065, 'samples': 20324864, 'steps': 39696, 'loss/train': 1.4296166896820068} +03/05/2022 12:07:33 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/05/2022 12:07:37 - INFO - codeparrot_training - Step 39697: {'lr': 0.00042414199589689084, 'samples': 20325376, 'steps': 39697, 'loss/train': 2.0093579292297363} +03/05/2022 12:07:40 - INFO - codeparrot_training - Step 39698: {'lr': 0.0004241381883174254, 'samples': 20325888, 'steps': 39698, 'loss/train': 1.9618418216705322} +03/05/2022 12:07:43 - INFO - codeparrot_training - Step 39699: {'lr': 0.00042413438065949595, 'samples': 20326400, 'steps': 39699, 'loss/train': 2.1491880416870117} +03/05/2022 12:07:44 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/05/2022 12:07:48 - INFO - codeparrot_training - Step 39700: {'lr': 0.0004241305729231042, 'samples': 20326912, 'steps': 39700, 'loss/train': 1.3722018003463745} +03/05/2022 12:07:51 - INFO - codeparrot_training - Step 39701: {'lr': 0.00042412676510825197, 'samples': 20327424, 'steps': 39701, 'loss/train': 2.1167895793914795} +03/05/2022 12:07:53 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 12:07:57 - INFO - codeparrot_training - Step 39702: {'lr': 0.00042412295721494086, 'samples': 20327936, 'steps': 39702, 'loss/train': 2.2206218242645264} +03/05/2022 12:08:00 - INFO - codeparrot_training - Step 39703: {'lr': 0.00042411914924317265, 'samples': 20328448, 'steps': 39703, 'loss/train': 1.7333424091339111} +03/05/2022 12:08:01 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 12:08:05 - INFO - codeparrot_training - Step 39704: {'lr': 0.00042411534119294903, 'samples': 20328960, 'steps': 39704, 'loss/train': 1.5960036516189575} +03/05/2022 12:08:08 - INFO - codeparrot_training - Step 39705: {'lr': 0.0004241115330642717, 'samples': 20329472, 'steps': 39705, 'loss/train': 1.3677841424942017} +03/05/2022 12:08:13 - INFO - codeparrot_training - Step 39706: {'lr': 0.0004241077248571424, 'samples': 20329984, 'steps': 39706, 'loss/train': 2.119549036026001} +03/05/2022 12:08:17 - INFO - codeparrot_training - Step 39707: {'lr': 0.0004241039165715629, 'samples': 20330496, 'steps': 39707, 'loss/train': 1.2594550848007202} +03/05/2022 12:08:18 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/05/2022 12:08:22 - INFO - codeparrot_training - Step 39708: {'lr': 0.00042410010820753485, 'samples': 20331008, 'steps': 39708, 'loss/train': 1.325139045715332} +03/05/2022 12:08:25 - INFO - codeparrot_training - Step 39709: {'lr': 0.00042409629976505994, 'samples': 20331520, 'steps': 39709, 'loss/train': 1.6221920251846313} +03/05/2022 12:08:27 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 12:08:30 - INFO - codeparrot_training - Step 39710: {'lr': 0.00042409249124414, 'samples': 20332032, 'steps': 39710, 'loss/train': 1.640898585319519} +03/05/2022 12:08:34 - INFO - codeparrot_training - Step 39711: {'lr': 0.00042408868264477657, 'samples': 20332544, 'steps': 39711, 'loss/train': 1.203019142150879} +03/05/2022 12:08:36 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 12:08:39 - INFO - codeparrot_training - Step 39712: {'lr': 0.00042408487396697147, 'samples': 20333056, 'steps': 39712, 'loss/train': 1.7528810501098633} +03/05/2022 12:08:42 - INFO - codeparrot_training - Step 39713: {'lr': 0.0004240810652107265, 'samples': 20333568, 'steps': 39713, 'loss/train': 2.0723767280578613} +03/05/2022 12:08:44 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 12:08:47 - INFO - codeparrot_training - Step 39714: {'lr': 0.0004240772563760432, 'samples': 20334080, 'steps': 39714, 'loss/train': 2.3668529987335205} +03/05/2022 12:08:51 - INFO - codeparrot_training - Step 39715: {'lr': 0.00042407344746292345, 'samples': 20334592, 'steps': 39715, 'loss/train': 2.5309340953826904} +03/05/2022 12:08:53 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/05/2022 12:08:56 - INFO - codeparrot_training - Step 39716: {'lr': 0.00042406963847136883, 'samples': 20335104, 'steps': 39716, 'loss/train': 1.3066926002502441} +03/05/2022 12:08:59 - INFO - codeparrot_training - Step 39717: {'lr': 0.0004240658294013812, 'samples': 20335616, 'steps': 39717, 'loss/train': 0.31821590662002563} +03/05/2022 12:09:01 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 12:09:05 - INFO - codeparrot_training - Step 39718: {'lr': 0.00042406202025296213, 'samples': 20336128, 'steps': 39718, 'loss/train': 1.5786982774734497} +03/05/2022 12:09:08 - INFO - codeparrot_training - Step 39719: {'lr': 0.00042405821102611336, 'samples': 20336640, 'steps': 39719, 'loss/train': 1.7996127605438232} +03/05/2022 12:09:10 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/05/2022 12:09:13 - INFO - codeparrot_training - Step 39720: {'lr': 0.0004240544017208367, 'samples': 20337152, 'steps': 39720, 'loss/train': 2.0658388137817383} +03/05/2022 12:09:16 - INFO - codeparrot_training - Step 39721: {'lr': 0.0004240505923371338, 'samples': 20337664, 'steps': 39721, 'loss/train': 0.9575673937797546} +03/05/2022 12:09:18 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 12:09:21 - INFO - codeparrot_training - Step 39722: {'lr': 0.0004240467828750064, 'samples': 20338176, 'steps': 39722, 'loss/train': 1.9698398113250732} +03/05/2022 12:09:25 - INFO - codeparrot_training - Step 39723: {'lr': 0.0004240429733344562, 'samples': 20338688, 'steps': 39723, 'loss/train': 1.0356611013412476} +03/05/2022 12:09:26 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 12:09:30 - INFO - codeparrot_training - Step 39724: {'lr': 0.0004240391637154849, 'samples': 20339200, 'steps': 39724, 'loss/train': 0.6929736137390137} +03/05/2022 12:09:33 - INFO - codeparrot_training - Step 39725: {'lr': 0.0004240353540180942, 'samples': 20339712, 'steps': 39725, 'loss/train': 2.0943732261657715} +03/05/2022 12:09:35 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/05/2022 12:09:38 - INFO - codeparrot_training - Step 39726: {'lr': 0.00042403154424228596, 'samples': 20340224, 'steps': 39726, 'loss/train': 6.650343418121338} +03/05/2022 12:09:41 - INFO - codeparrot_training - Step 39727: {'lr': 0.00042402773438806175, 'samples': 20340736, 'steps': 39727, 'loss/train': 2.104301929473877} +03/05/2022 12:09:43 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/05/2022 12:09:47 - INFO - codeparrot_training - Step 39728: {'lr': 0.00042402392445542333, 'samples': 20341248, 'steps': 39728, 'loss/train': 0.5483896136283875} +03/05/2022 12:09:50 - INFO - codeparrot_training - Step 39729: {'lr': 0.0004240201144443724, 'samples': 20341760, 'steps': 39729, 'loss/train': 1.8378880023956299} +03/05/2022 12:09:51 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 12:09:55 - INFO - codeparrot_training - Step 39730: {'lr': 0.00042401630435491073, 'samples': 20342272, 'steps': 39730, 'loss/train': 1.8550362586975098} +03/05/2022 12:09:58 - INFO - codeparrot_training - Step 39731: {'lr': 0.00042401249418703996, 'samples': 20342784, 'steps': 39731, 'loss/train': 1.6491514444351196} +03/05/2022 12:10:00 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/05/2022 12:10:04 - INFO - codeparrot_training - Step 39732: {'lr': 0.00042400868394076185, 'samples': 20343296, 'steps': 39732, 'loss/train': 1.980665922164917} +03/05/2022 12:10:07 - INFO - codeparrot_training - Step 39733: {'lr': 0.0004240048736160781, 'samples': 20343808, 'steps': 39733, 'loss/train': 2.1470589637756348} +03/05/2022 12:10:09 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 12:10:12 - INFO - codeparrot_training - Step 39734: {'lr': 0.0004240010632129905, 'samples': 20344320, 'steps': 39734, 'loss/train': 1.5302097797393799} +03/05/2022 12:10:15 - INFO - codeparrot_training - Step 39735: {'lr': 0.00042399725273150056, 'samples': 20344832, 'steps': 39735, 'loss/train': 1.196291208267212} +03/05/2022 12:10:17 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 12:10:20 - INFO - codeparrot_training - Step 39736: {'lr': 0.0004239934421716103, 'samples': 20345344, 'steps': 39736, 'loss/train': 1.7782800197601318} +03/05/2022 12:10:24 - INFO - codeparrot_training - Step 39737: {'lr': 0.00042398963153332124, 'samples': 20345856, 'steps': 39737, 'loss/train': 1.6377003192901611} +03/05/2022 12:10:25 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/05/2022 12:10:29 - INFO - codeparrot_training - Step 39738: {'lr': 0.00042398582081663513, 'samples': 20346368, 'steps': 39738, 'loss/train': 2.029045343399048} +03/05/2022 12:10:32 - INFO - codeparrot_training - Step 39739: {'lr': 0.0004239820100215537, 'samples': 20346880, 'steps': 39739, 'loss/train': 1.489431619644165} +03/05/2022 12:10:37 - INFO - codeparrot_training - Step 39740: {'lr': 0.00042397819914807855, 'samples': 20347392, 'steps': 39740, 'loss/train': 1.8223440647125244} +03/05/2022 12:10:41 - INFO - codeparrot_training - Step 39741: {'lr': 0.00042397438819621164, 'samples': 20347904, 'steps': 39741, 'loss/train': 2.48219895362854} +03/05/2022 12:10:42 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 12:10:46 - INFO - codeparrot_training - Step 39742: {'lr': 0.0004239705771659545, 'samples': 20348416, 'steps': 39742, 'loss/train': 1.0197758674621582} +03/05/2022 12:10:49 - INFO - codeparrot_training - Step 39743: {'lr': 0.000423966766057309, 'samples': 20348928, 'steps': 39743, 'loss/train': 1.537539005279541} +03/05/2022 12:10:51 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 12:10:55 - INFO - codeparrot_training - Step 39744: {'lr': 0.00042396295487027666, 'samples': 20349440, 'steps': 39744, 'loss/train': 1.895847201347351} +03/05/2022 12:10:58 - INFO - codeparrot_training - Step 39745: {'lr': 0.0004239591436048593, 'samples': 20349952, 'steps': 39745, 'loss/train': 2.3189613819122314} +03/05/2022 12:10:59 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/05/2022 12:11:03 - INFO - codeparrot_training - Step 39746: {'lr': 0.0004239553322610586, 'samples': 20350464, 'steps': 39746, 'loss/train': 1.871346116065979} +03/05/2022 12:11:06 - INFO - codeparrot_training - Step 39747: {'lr': 0.0004239515208388764, 'samples': 20350976, 'steps': 39747, 'loss/train': 0.7931369543075562} +03/05/2022 12:11:08 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/05/2022 12:11:11 - INFO - codeparrot_training - Step 39748: {'lr': 0.00042394770933831425, 'samples': 20351488, 'steps': 39748, 'loss/train': 1.5695595741271973} +03/05/2022 12:11:14 - INFO - codeparrot_training - Step 39749: {'lr': 0.00042394389775937403, 'samples': 20352000, 'steps': 39749, 'loss/train': 1.582062840461731} +03/05/2022 12:11:16 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/05/2022 12:11:20 - INFO - codeparrot_training - Step 39750: {'lr': 0.0004239400861020574, 'samples': 20352512, 'steps': 39750, 'loss/train': 1.7411938905715942} +03/05/2022 12:11:23 - INFO - codeparrot_training - Step 39751: {'lr': 0.00042393627436636597, 'samples': 20353024, 'steps': 39751, 'loss/train': 1.994664192199707} +03/05/2022 12:11:24 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/05/2022 12:11:28 - INFO - codeparrot_training - Step 39752: {'lr': 0.0004239324625523015, 'samples': 20353536, 'steps': 39752, 'loss/train': 1.2105003595352173} +03/05/2022 12:11:31 - INFO - codeparrot_training - Step 39753: {'lr': 0.00042392865065986573, 'samples': 20354048, 'steps': 39753, 'loss/train': 1.6135072708129883} +03/05/2022 12:11:32 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 12:11:36 - INFO - codeparrot_training - Step 39754: {'lr': 0.00042392483868906053, 'samples': 20354560, 'steps': 39754, 'loss/train': 1.0073846578598022} +03/05/2022 12:11:40 - INFO - codeparrot_training - Step 39755: {'lr': 0.0004239210266398874, 'samples': 20355072, 'steps': 39755, 'loss/train': 1.5900074243545532} +03/05/2022 12:11:41 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 12:11:45 - INFO - codeparrot_training - Step 39756: {'lr': 0.0004239172145123481, 'samples': 20355584, 'steps': 39756, 'loss/train': 2.815011501312256} +03/05/2022 12:11:48 - INFO - codeparrot_training - Step 39757: {'lr': 0.0004239134023064445, 'samples': 20356096, 'steps': 39757, 'loss/train': 0.9682260155677795} +03/05/2022 12:11:49 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 12:11:53 - INFO - codeparrot_training - Step 39758: {'lr': 0.0004239095900221781, 'samples': 20356608, 'steps': 39758, 'loss/train': 1.469543218612671} +03/05/2022 12:11:57 - INFO - codeparrot_training - Step 39759: {'lr': 0.00042390577765955077, 'samples': 20357120, 'steps': 39759, 'loss/train': 1.7951879501342773} +03/05/2022 12:11:58 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/05/2022 12:12:02 - INFO - codeparrot_training - Step 39760: {'lr': 0.00042390196521856417, 'samples': 20357632, 'steps': 39760, 'loss/train': 1.8760440349578857} +03/05/2022 12:12:05 - INFO - codeparrot_training - Step 39761: {'lr': 0.00042389815269922005, 'samples': 20358144, 'steps': 39761, 'loss/train': 1.5235909223556519} +03/05/2022 12:12:07 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/05/2022 12:12:10 - INFO - codeparrot_training - Step 39762: {'lr': 0.0004238943401015201, 'samples': 20358656, 'steps': 39762, 'loss/train': 0.9513997435569763} +03/05/2022 12:12:14 - INFO - codeparrot_training - Step 39763: {'lr': 0.0004238905274254661, 'samples': 20359168, 'steps': 39763, 'loss/train': 1.8673384189605713} +03/05/2022 12:12:15 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/05/2022 12:12:19 - INFO - codeparrot_training - Step 39764: {'lr': 0.0004238867146710596, 'samples': 20359680, 'steps': 39764, 'loss/train': 0.9310447573661804} +03/05/2022 12:12:22 - INFO - codeparrot_training - Step 39765: {'lr': 0.0004238829018383025, 'samples': 20360192, 'steps': 39765, 'loss/train': 1.4351699352264404} +03/05/2022 12:12:23 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 12:12:27 - INFO - codeparrot_training - Step 39766: {'lr': 0.0004238790889271964, 'samples': 20360704, 'steps': 39766, 'loss/train': 1.0077195167541504} +03/05/2022 12:12:30 - INFO - codeparrot_training - Step 39767: {'lr': 0.0004238752759377431, 'samples': 20361216, 'steps': 39767, 'loss/train': 1.8727296590805054} +03/05/2022 12:12:32 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/05/2022 12:12:36 - INFO - codeparrot_training - Step 39768: {'lr': 0.0004238714628699443, 'samples': 20361728, 'steps': 39768, 'loss/train': 1.916914701461792} +03/05/2022 12:12:39 - INFO - codeparrot_training - Step 39769: {'lr': 0.00042386764972380164, 'samples': 20362240, 'steps': 39769, 'loss/train': 0.6778926253318787} +03/05/2022 12:12:40 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 12:12:44 - INFO - codeparrot_training - Step 39770: {'lr': 0.00042386383649931693, 'samples': 20362752, 'steps': 39770, 'loss/train': 2.377271890640259} +03/05/2022 12:12:47 - INFO - codeparrot_training - Step 39771: {'lr': 0.00042386002319649184, 'samples': 20363264, 'steps': 39771, 'loss/train': 1.6341294050216675} +03/05/2022 12:12:48 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/05/2022 12:12:52 - INFO - codeparrot_training - Step 39772: {'lr': 0.0004238562098153281, 'samples': 20363776, 'steps': 39772, 'loss/train': 1.7641915082931519} +03/05/2022 12:12:56 - INFO - codeparrot_training - Step 39773: {'lr': 0.0004238523963558275, 'samples': 20364288, 'steps': 39773, 'loss/train': 1.123089075088501} +03/05/2022 12:12:57 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 12:13:01 - INFO - codeparrot_training - Step 39774: {'lr': 0.0004238485828179917, 'samples': 20364800, 'steps': 39774, 'loss/train': 1.668440818786621} +03/05/2022 12:13:04 - INFO - codeparrot_training - Step 39775: {'lr': 0.00042384476920182234, 'samples': 20365312, 'steps': 39775, 'loss/train': 1.8428784608840942} +03/05/2022 12:13:05 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/05/2022 12:13:09 - INFO - codeparrot_training - Step 39776: {'lr': 0.0004238409555073212, 'samples': 20365824, 'steps': 39776, 'loss/train': 1.9015308618545532} +03/05/2022 12:13:13 - INFO - codeparrot_training - Step 39777: {'lr': 0.00042383714173449007, 'samples': 20366336, 'steps': 39777, 'loss/train': 2.2158849239349365} +03/05/2022 12:13:14 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/05/2022 12:13:18 - INFO - codeparrot_training - Step 39778: {'lr': 0.00042383332788333055, 'samples': 20366848, 'steps': 39778, 'loss/train': 2.553380250930786} +03/05/2022 12:13:21 - INFO - codeparrot_training - Step 39779: {'lr': 0.0004238295139538445, 'samples': 20367360, 'steps': 39779, 'loss/train': 1.5504097938537598} +03/05/2022 12:13:22 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/05/2022 12:13:26 - INFO - codeparrot_training - Step 39780: {'lr': 0.0004238256999460335, 'samples': 20367872, 'steps': 39780, 'loss/train': 1.0681391954421997} +03/05/2022 12:13:29 - INFO - codeparrot_training - Step 39781: {'lr': 0.00042382188585989933, 'samples': 20368384, 'steps': 39781, 'loss/train': 1.8137515783309937} +03/05/2022 12:13:30 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 12:13:35 - INFO - codeparrot_training - Step 39782: {'lr': 0.0004238180716954436, 'samples': 20368896, 'steps': 39782, 'loss/train': 1.5131499767303467} +03/05/2022 12:13:38 - INFO - codeparrot_training - Step 39783: {'lr': 0.0004238142574526683, 'samples': 20369408, 'steps': 39783, 'loss/train': 1.202422857284546} +03/05/2022 12:13:38 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 12:13:43 - INFO - codeparrot_training - Step 39784: {'lr': 0.0004238104431315749, 'samples': 20369920, 'steps': 39784, 'loss/train': 1.8084338903427124} +03/05/2022 12:13:46 - INFO - codeparrot_training - Step 39785: {'lr': 0.00042380662873216517, 'samples': 20370432, 'steps': 39785, 'loss/train': 2.018679618835449} +03/05/2022 12:13:47 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 12:13:51 - INFO - codeparrot_training - Step 39786: {'lr': 0.00042380281425444087, 'samples': 20370944, 'steps': 39786, 'loss/train': 4.231381416320801} +03/05/2022 12:13:55 - INFO - codeparrot_training - Step 39787: {'lr': 0.0004237989996984037, 'samples': 20371456, 'steps': 39787, 'loss/train': 2.237713575363159} +03/05/2022 12:13:55 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 12:14:00 - INFO - codeparrot_training - Step 39788: {'lr': 0.0004237951850640555, 'samples': 20371968, 'steps': 39788, 'loss/train': 1.5606929063796997} +03/05/2022 12:14:03 - INFO - codeparrot_training - Step 39789: {'lr': 0.0004237913703513977, 'samples': 20372480, 'steps': 39789, 'loss/train': 1.7312358617782593} +03/05/2022 12:14:03 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/05/2022 12:14:08 - INFO - codeparrot_training - Step 39790: {'lr': 0.00042378755556043225, 'samples': 20372992, 'steps': 39790, 'loss/train': 1.643588900566101} +03/05/2022 12:14:11 - INFO - codeparrot_training - Step 39791: {'lr': 0.0004237837406911608, 'samples': 20373504, 'steps': 39791, 'loss/train': 1.759361743927002} +03/05/2022 12:14:12 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/05/2022 12:14:17 - INFO - codeparrot_training - Step 39792: {'lr': 0.00042377992574358514, 'samples': 20374016, 'steps': 39792, 'loss/train': 1.719692349433899} +03/05/2022 12:14:20 - INFO - codeparrot_training - Step 39793: {'lr': 0.0004237761107177068, 'samples': 20374528, 'steps': 39793, 'loss/train': 2.2635529041290283} +03/05/2022 12:14:20 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 12:14:25 - INFO - codeparrot_training - Step 39794: {'lr': 0.00042377229561352774, 'samples': 20375040, 'steps': 39794, 'loss/train': 1.7810338735580444} +03/05/2022 12:14:28 - INFO - codeparrot_training - Step 39795: {'lr': 0.00042376848043104953, 'samples': 20375552, 'steps': 39795, 'loss/train': 1.7315396070480347} +03/05/2022 12:14:29 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 12:14:34 - INFO - codeparrot_training - Step 39796: {'lr': 0.00042376466517027387, 'samples': 20376064, 'steps': 39796, 'loss/train': 1.4263797998428345} +03/05/2022 12:14:37 - INFO - codeparrot_training - Step 39797: {'lr': 0.00042376084983120266, 'samples': 20376576, 'steps': 39797, 'loss/train': 1.4127750396728516} +03/05/2022 12:14:38 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/05/2022 12:14:42 - INFO - codeparrot_training - Step 39798: {'lr': 0.0004237570344138374, 'samples': 20377088, 'steps': 39798, 'loss/train': 0.5726118087768555} +03/05/2022 12:14:45 - INFO - codeparrot_training - Step 39799: {'lr': 0.00042375321891818, 'samples': 20377600, 'steps': 39799, 'loss/train': 1.66351318359375} +03/05/2022 12:14:46 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/05/2022 12:14:51 - INFO - codeparrot_training - Step 39800: {'lr': 0.00042374940334423194, 'samples': 20378112, 'steps': 39800, 'loss/train': 1.4546575546264648} +03/05/2022 12:14:54 - INFO - codeparrot_training - Step 39801: {'lr': 0.00042374558769199517, 'samples': 20378624, 'steps': 39801, 'loss/train': 5.476089000701904} +03/05/2022 12:14:57 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 12:15:00 - INFO - codeparrot_training - Step 39802: {'lr': 0.0004237417719614713, 'samples': 20379136, 'steps': 39802, 'loss/train': 1.785767674446106} +03/05/2022 12:15:03 - INFO - codeparrot_training - Step 39803: {'lr': 0.000423737956152662, 'samples': 20379648, 'steps': 39803, 'loss/train': 1.060845971107483} +03/05/2022 12:15:06 - INFO - codeparrot_training - Step 39804: {'lr': 0.0004237341402655692, 'samples': 20380160, 'steps': 39804, 'loss/train': 2.177276372909546} +03/05/2022 12:15:06 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 12:15:11 - INFO - codeparrot_training - Step 39805: {'lr': 0.00042373032430019443, 'samples': 20380672, 'steps': 39805, 'loss/train': 2.085352897644043} +03/05/2022 12:15:14 - INFO - codeparrot_training - Step 39806: {'lr': 0.00042372650825653937, 'samples': 20381184, 'steps': 39806, 'loss/train': 0.6967990398406982} +03/05/2022 12:15:14 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 12:15:20 - INFO - codeparrot_training - Step 39807: {'lr': 0.0004237226921346059, 'samples': 20381696, 'steps': 39807, 'loss/train': 1.8778609037399292} +03/05/2022 12:15:23 - INFO - codeparrot_training - Step 39808: {'lr': 0.0004237188759343956, 'samples': 20382208, 'steps': 39808, 'loss/train': 1.7179851531982422} +03/05/2022 12:15:23 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/05/2022 12:15:28 - INFO - codeparrot_training - Step 39809: {'lr': 0.0004237150596559103, 'samples': 20382720, 'steps': 39809, 'loss/train': 1.1935399770736694} +03/05/2022 12:15:31 - INFO - codeparrot_training - Step 39810: {'lr': 0.00042371124329915167, 'samples': 20383232, 'steps': 39810, 'loss/train': 2.141422748565674} +03/05/2022 12:15:31 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/05/2022 12:15:37 - INFO - codeparrot_training - Step 39811: {'lr': 0.0004237074268641215, 'samples': 20383744, 'steps': 39811, 'loss/train': 1.2997610569000244} +03/05/2022 12:15:40 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/05/2022 12:15:42 - INFO - codeparrot_training - Step 39812: {'lr': 0.00042370361035082136, 'samples': 20384256, 'steps': 39812, 'loss/train': 2.148790121078491} +03/05/2022 12:15:45 - INFO - codeparrot_training - Step 39813: {'lr': 0.000423699793759253, 'samples': 20384768, 'steps': 39813, 'loss/train': 2.8612000942230225} +03/05/2022 12:15:48 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/05/2022 12:15:50 - INFO - codeparrot_training - Step 39814: {'lr': 0.0004236959770894183, 'samples': 20385280, 'steps': 39814, 'loss/train': 1.7612874507904053} +03/05/2022 12:15:54 - INFO - codeparrot_training - Step 39815: {'lr': 0.00042369216034131887, 'samples': 20385792, 'steps': 39815, 'loss/train': 1.492950439453125} +03/05/2022 12:15:57 - INFO - codeparrot_training - Step 39816: {'lr': 0.0004236883435149564, 'samples': 20386304, 'steps': 39816, 'loss/train': 1.4257001876831055} +03/05/2022 12:15:57 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/05/2022 12:16:02 - INFO - codeparrot_training - Step 39817: {'lr': 0.0004236845266103327, 'samples': 20386816, 'steps': 39817, 'loss/train': 2.480581283569336} +03/05/2022 12:16:05 - INFO - codeparrot_training - Step 39818: {'lr': 0.00042368070962744937, 'samples': 20387328, 'steps': 39818, 'loss/train': 1.4131630659103394} +03/05/2022 12:16:05 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 12:16:10 - INFO - codeparrot_training - Step 39819: {'lr': 0.0004236768925663082, 'samples': 20387840, 'steps': 39819, 'loss/train': 1.248265266418457} +03/05/2022 12:16:14 - INFO - codeparrot_training - Step 39820: {'lr': 0.0004236730754269109, 'samples': 20388352, 'steps': 39820, 'loss/train': 2.3671958446502686} +03/05/2022 12:16:14 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 12:16:19 - INFO - codeparrot_training - Step 39821: {'lr': 0.00042366925820925915, 'samples': 20388864, 'steps': 39821, 'loss/train': 1.500373125076294} +03/05/2022 12:16:22 - INFO - codeparrot_training - Step 39822: {'lr': 0.0004236654409133548, 'samples': 20389376, 'steps': 39822, 'loss/train': 1.9576431512832642} +03/05/2022 12:16:22 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/05/2022 12:16:27 - INFO - codeparrot_training - Step 39823: {'lr': 0.0004236616235391995, 'samples': 20389888, 'steps': 39823, 'loss/train': 1.5787209272384644} +03/05/2022 12:16:30 - INFO - codeparrot_training - Step 39824: {'lr': 0.0004236578060867949, 'samples': 20390400, 'steps': 39824, 'loss/train': 0.5418258905410767} +03/05/2022 12:16:31 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 12:16:36 - INFO - codeparrot_training - Step 39825: {'lr': 0.0004236539885561427, 'samples': 20390912, 'steps': 39825, 'loss/train': 1.3942497968673706} +03/05/2022 12:16:39 - INFO - codeparrot_training - Step 39826: {'lr': 0.0004236501709472448, 'samples': 20391424, 'steps': 39826, 'loss/train': 1.8778427839279175} +03/05/2022 12:16:39 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 12:16:44 - INFO - codeparrot_training - Step 39827: {'lr': 0.00042364635326010277, 'samples': 20391936, 'steps': 39827, 'loss/train': 1.8485225439071655} +03/05/2022 12:16:47 - INFO - codeparrot_training - Step 39828: {'lr': 0.0004236425354947183, 'samples': 20392448, 'steps': 39828, 'loss/train': 1.3074642419815063} +03/05/2022 12:16:47 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 12:16:53 - INFO - codeparrot_training - Step 39829: {'lr': 0.0004236387176510933, 'samples': 20392960, 'steps': 39829, 'loss/train': 1.85964035987854} +03/05/2022 12:16:56 - INFO - codeparrot_training - Step 39830: {'lr': 0.00042363489972922937, 'samples': 20393472, 'steps': 39830, 'loss/train': 1.464102864265442} +03/05/2022 12:16:56 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/05/2022 12:17:01 - INFO - codeparrot_training - Step 39831: {'lr': 0.00042363108172912824, 'samples': 20393984, 'steps': 39831, 'loss/train': 2.4689626693725586} +03/05/2022 12:17:04 - INFO - codeparrot_training - Step 39832: {'lr': 0.0004236272636507915, 'samples': 20394496, 'steps': 39832, 'loss/train': 1.969588041305542} +03/05/2022 12:17:04 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 12:17:10 - INFO - codeparrot_training - Step 39833: {'lr': 0.0004236234454942211, 'samples': 20395008, 'steps': 39833, 'loss/train': 0.7124606370925903} +03/05/2022 12:17:12 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 12:17:15 - INFO - codeparrot_training - Step 39834: {'lr': 0.0004236196272594186, 'samples': 20395520, 'steps': 39834, 'loss/train': 1.5215046405792236} +03/05/2022 12:17:18 - INFO - codeparrot_training - Step 39835: {'lr': 0.00042361580894638586, 'samples': 20396032, 'steps': 39835, 'loss/train': 2.6861088275909424} +03/05/2022 12:17:21 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/05/2022 12:17:23 - INFO - codeparrot_training - Step 39836: {'lr': 0.0004236119905551244, 'samples': 20396544, 'steps': 39836, 'loss/train': 1.7392479181289673} +03/05/2022 12:17:27 - INFO - codeparrot_training - Step 39837: {'lr': 0.0004236081720856362, 'samples': 20397056, 'steps': 39837, 'loss/train': 1.983737826347351} +03/05/2022 12:17:29 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 12:17:32 - INFO - codeparrot_training - Step 39838: {'lr': 0.0004236043535379227, 'samples': 20397568, 'steps': 39838, 'loss/train': 1.3157191276550293} +03/05/2022 12:17:35 - INFO - codeparrot_training - Step 39839: {'lr': 0.0004236005349119858, 'samples': 20398080, 'steps': 39839, 'loss/train': 1.9999973773956299} +03/05/2022 12:17:38 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 12:17:40 - INFO - codeparrot_training - Step 39840: {'lr': 0.0004235967162078272, 'samples': 20398592, 'steps': 39840, 'loss/train': 1.8136247396469116} +03/05/2022 12:17:43 - INFO - codeparrot_training - Step 39841: {'lr': 0.0004235928974254486, 'samples': 20399104, 'steps': 39841, 'loss/train': 1.6259191036224365} +03/05/2022 12:17:46 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 12:17:49 - INFO - codeparrot_training - Step 39842: {'lr': 0.00042358907856485166, 'samples': 20399616, 'steps': 39842, 'loss/train': 1.8064451217651367} +03/05/2022 12:17:52 - INFO - codeparrot_training - Step 39843: {'lr': 0.0004235852596260382, 'samples': 20400128, 'steps': 39843, 'loss/train': 0.7688383460044861} +03/05/2022 12:17:54 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 12:17:57 - INFO - codeparrot_training - Step 39844: {'lr': 0.0004235814406090099, 'samples': 20400640, 'steps': 39844, 'loss/train': 1.9100489616394043} +03/05/2022 12:18:00 - INFO - codeparrot_training - Step 39845: {'lr': 0.0004235776215137686, 'samples': 20401152, 'steps': 39845, 'loss/train': 1.6196867227554321} +03/05/2022 12:18:03 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/05/2022 12:18:06 - INFO - codeparrot_training - Step 39846: {'lr': 0.0004235738023403157, 'samples': 20401664, 'steps': 39846, 'loss/train': 2.053760051727295} +03/05/2022 12:18:09 - INFO - codeparrot_training - Step 39847: {'lr': 0.00042356998308865323, 'samples': 20402176, 'steps': 39847, 'loss/train': 1.964150309562683} +03/05/2022 12:18:11 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 12:18:14 - INFO - codeparrot_training - Step 39848: {'lr': 0.00042356616375878274, 'samples': 20402688, 'steps': 39848, 'loss/train': 1.5895428657531738} +03/05/2022 12:18:17 - INFO - codeparrot_training - Step 39849: {'lr': 0.00042356234435070604, 'samples': 20403200, 'steps': 39849, 'loss/train': 1.8729695081710815} +03/05/2022 12:18:19 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/05/2022 12:18:22 - INFO - codeparrot_training - Step 39850: {'lr': 0.0004235585248644249, 'samples': 20403712, 'steps': 39850, 'loss/train': 1.1836236715316772} +03/05/2022 12:18:25 - INFO - codeparrot_training - Step 39851: {'lr': 0.0004235547052999409, 'samples': 20404224, 'steps': 39851, 'loss/train': 1.8481007814407349} +03/05/2022 12:18:28 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/05/2022 12:18:31 - INFO - codeparrot_training - Step 39852: {'lr': 0.00042355088565725584, 'samples': 20404736, 'steps': 39852, 'loss/train': 2.1526317596435547} +03/05/2022 12:18:34 - INFO - codeparrot_training - Step 39853: {'lr': 0.0004235470659363714, 'samples': 20405248, 'steps': 39853, 'loss/train': 1.782456636428833} +03/05/2022 12:18:36 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 12:18:39 - INFO - codeparrot_training - Step 39854: {'lr': 0.0004235432461372894, 'samples': 20405760, 'steps': 39854, 'loss/train': 1.5759862661361694} +03/05/2022 12:18:42 - INFO - codeparrot_training - Step 39855: {'lr': 0.0004235394262600114, 'samples': 20406272, 'steps': 39855, 'loss/train': 2.2514705657958984} +03/05/2022 12:18:45 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 12:18:48 - INFO - codeparrot_training - Step 39856: {'lr': 0.0004235356063045393, 'samples': 20406784, 'steps': 39856, 'loss/train': 1.3339650630950928} +03/05/2022 12:18:51 - INFO - codeparrot_training - Step 39857: {'lr': 0.0004235317862708747, 'samples': 20407296, 'steps': 39857, 'loss/train': 1.9962352514266968} +03/05/2022 12:18:53 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/05/2022 12:18:56 - INFO - codeparrot_training - Step 39858: {'lr': 0.00042352796615901937, 'samples': 20407808, 'steps': 39858, 'loss/train': 2.3052124977111816} +03/05/2022 12:18:59 - INFO - codeparrot_training - Step 39859: {'lr': 0.000423524145968975, 'samples': 20408320, 'steps': 39859, 'loss/train': 1.4809904098510742} +03/05/2022 12:19:01 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 12:19:04 - INFO - codeparrot_training - Step 39860: {'lr': 0.00042352032570074327, 'samples': 20408832, 'steps': 39860, 'loss/train': 1.9197373390197754} +03/05/2022 12:19:08 - INFO - codeparrot_training - Step 39861: {'lr': 0.00042351650535432607, 'samples': 20409344, 'steps': 39861, 'loss/train': 1.754490613937378} +03/05/2022 12:19:10 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/05/2022 12:19:13 - INFO - codeparrot_training - Step 39862: {'lr': 0.00042351268492972494, 'samples': 20409856, 'steps': 39862, 'loss/train': 1.7043520212173462} +03/05/2022 12:19:16 - INFO - codeparrot_training - Step 39863: {'lr': 0.0004235088644269417, 'samples': 20410368, 'steps': 39863, 'loss/train': 1.731917142868042} +03/05/2022 12:19:18 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 12:19:21 - INFO - codeparrot_training - Step 39864: {'lr': 0.00042350504384597803, 'samples': 20410880, 'steps': 39864, 'loss/train': 2.481557846069336} +03/05/2022 12:19:24 - INFO - codeparrot_training - Step 39865: {'lr': 0.0004235012231868357, 'samples': 20411392, 'steps': 39865, 'loss/train': 1.0871659517288208} +03/05/2022 12:19:26 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 12:19:30 - INFO - codeparrot_training - Step 39866: {'lr': 0.0004234974024495163, 'samples': 20411904, 'steps': 39866, 'loss/train': 1.4249085187911987} +03/05/2022 12:19:33 - INFO - codeparrot_training - Step 39867: {'lr': 0.00042349358163402175, 'samples': 20412416, 'steps': 39867, 'loss/train': 1.5193296670913696} +03/05/2022 12:19:34 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 12:19:38 - INFO - codeparrot_training - Step 39868: {'lr': 0.0004234897607403536, 'samples': 20412928, 'steps': 39868, 'loss/train': 1.5914673805236816} +03/05/2022 12:19:41 - INFO - codeparrot_training - Step 39869: {'lr': 0.0004234859397685137, 'samples': 20413440, 'steps': 39869, 'loss/train': 1.0556252002716064} +03/05/2022 12:19:43 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 12:19:46 - INFO - codeparrot_training - Step 39870: {'lr': 0.0004234821187185036, 'samples': 20413952, 'steps': 39870, 'loss/train': 1.638780117034912} +03/05/2022 12:19:50 - INFO - codeparrot_training - Step 39871: {'lr': 0.0004234782975903253, 'samples': 20414464, 'steps': 39871, 'loss/train': 1.2710968255996704} +03/05/2022 12:19:51 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 12:19:55 - INFO - codeparrot_training - Step 39872: {'lr': 0.00042347447638398024, 'samples': 20414976, 'steps': 39872, 'loss/train': 1.6201248168945312} +03/05/2022 12:19:58 - INFO - codeparrot_training - Step 39873: {'lr': 0.00042347065509947023, 'samples': 20415488, 'steps': 39873, 'loss/train': 1.504025936126709} +03/05/2022 12:20:00 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/05/2022 12:20:03 - INFO - codeparrot_training - Step 39874: {'lr': 0.0004234668337367971, 'samples': 20416000, 'steps': 39874, 'loss/train': 2.002636671066284} +03/05/2022 12:20:07 - INFO - codeparrot_training - Step 39875: {'lr': 0.0004234630122959625, 'samples': 20416512, 'steps': 39875, 'loss/train': 1.569145679473877} +03/05/2022 12:20:08 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 12:20:12 - INFO - codeparrot_training - Step 39876: {'lr': 0.0004234591907769681, 'samples': 20417024, 'steps': 39876, 'loss/train': 1.638311505317688} +03/05/2022 12:20:15 - INFO - codeparrot_training - Step 39877: {'lr': 0.0004234553691798156, 'samples': 20417536, 'steps': 39877, 'loss/train': 1.7621586322784424} +03/05/2022 12:20:16 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 12:20:20 - INFO - codeparrot_training - Step 39878: {'lr': 0.000423451547504507, 'samples': 20418048, 'steps': 39878, 'loss/train': 1.1588932275772095} +03/05/2022 12:20:23 - INFO - codeparrot_training - Step 39879: {'lr': 0.0004234477257510436, 'samples': 20418560, 'steps': 39879, 'loss/train': 1.3031576871871948} +03/05/2022 12:20:25 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 12:20:29 - INFO - codeparrot_training - Step 39880: {'lr': 0.00042344390391942745, 'samples': 20419072, 'steps': 39880, 'loss/train': 2.274768590927124} +03/05/2022 12:20:32 - INFO - codeparrot_training - Step 39881: {'lr': 0.0004234400820096601, 'samples': 20419584, 'steps': 39881, 'loss/train': 2.0333662033081055} +03/05/2022 12:20:33 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 12:20:37 - INFO - codeparrot_training - Step 39882: {'lr': 0.0004234362600217433, 'samples': 20420096, 'steps': 39882, 'loss/train': 1.5558302402496338} +03/05/2022 12:20:40 - INFO - codeparrot_training - Step 39883: {'lr': 0.0004234324379556789, 'samples': 20420608, 'steps': 39883, 'loss/train': 2.177628517150879} +03/05/2022 12:20:41 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 12:20:46 - INFO - codeparrot_training - Step 39884: {'lr': 0.0004234286158114684, 'samples': 20421120, 'steps': 39884, 'loss/train': 1.6624616384506226} +03/05/2022 12:20:49 - INFO - codeparrot_training - Step 39885: {'lr': 0.0004234247935891137, 'samples': 20421632, 'steps': 39885, 'loss/train': 1.874732255935669} +03/05/2022 12:20:50 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 12:20:54 - INFO - codeparrot_training - Step 39886: {'lr': 0.00042342097128861647, 'samples': 20422144, 'steps': 39886, 'loss/train': 1.5031816959381104} +03/05/2022 12:20:57 - INFO - codeparrot_training - Step 39887: {'lr': 0.0004234171489099784, 'samples': 20422656, 'steps': 39887, 'loss/train': 0.5505874156951904} +03/05/2022 12:20:58 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 12:21:02 - INFO - codeparrot_training - Step 39888: {'lr': 0.00042341332645320126, 'samples': 20423168, 'steps': 39888, 'loss/train': 1.8063597679138184} +03/05/2022 12:21:06 - INFO - codeparrot_training - Step 39889: {'lr': 0.0004234095039182867, 'samples': 20423680, 'steps': 39889, 'loss/train': 1.8304696083068848} +03/05/2022 12:21:06 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 12:21:11 - INFO - codeparrot_training - Step 39890: {'lr': 0.00042340568130523653, 'samples': 20424192, 'steps': 39890, 'loss/train': 1.739630103111267} +03/05/2022 12:21:14 - INFO - codeparrot_training - Step 39891: {'lr': 0.0004234018586140525, 'samples': 20424704, 'steps': 39891, 'loss/train': 1.8207424879074097} +03/05/2022 12:21:15 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/05/2022 12:21:19 - INFO - codeparrot_training - Step 39892: {'lr': 0.00042339803584473626, 'samples': 20425216, 'steps': 39892, 'loss/train': 0.5967734456062317} +03/05/2022 12:21:23 - INFO - codeparrot_training - Step 39893: {'lr': 0.0004233942129972894, 'samples': 20425728, 'steps': 39893, 'loss/train': 1.7384788990020752} +03/05/2022 12:21:23 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/05/2022 12:21:28 - INFO - codeparrot_training - Step 39894: {'lr': 0.00042339039007171386, 'samples': 20426240, 'steps': 39894, 'loss/train': 1.5008829832077026} +03/05/2022 12:21:31 - INFO - codeparrot_training - Step 39895: {'lr': 0.00042338656706801135, 'samples': 20426752, 'steps': 39895, 'loss/train': 1.4284151792526245} +03/05/2022 12:21:32 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/05/2022 12:21:36 - INFO - codeparrot_training - Step 39896: {'lr': 0.00042338274398618346, 'samples': 20427264, 'steps': 39896, 'loss/train': 1.5959205627441406} +03/05/2022 12:21:39 - INFO - codeparrot_training - Step 39897: {'lr': 0.000423378920826232, 'samples': 20427776, 'steps': 39897, 'loss/train': 1.7940974235534668} +03/05/2022 12:21:41 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 12:21:45 - INFO - codeparrot_training - Step 39898: {'lr': 0.0004233750975881587, 'samples': 20428288, 'steps': 39898, 'loss/train': 0.6617526412010193} +03/05/2022 12:21:48 - INFO - codeparrot_training - Step 39899: {'lr': 0.0004233712742719652, 'samples': 20428800, 'steps': 39899, 'loss/train': 1.0486679077148438} +03/05/2022 12:21:49 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/05/2022 12:21:53 - INFO - codeparrot_training - Step 39900: {'lr': 0.0004233674508776533, 'samples': 20429312, 'steps': 39900, 'loss/train': 2.1548430919647217} +03/05/2022 12:21:56 - INFO - codeparrot_training - Step 39901: {'lr': 0.00042336362740522473, 'samples': 20429824, 'steps': 39901, 'loss/train': 1.3083045482635498} +03/05/2022 12:21:57 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 12:22:02 - INFO - codeparrot_training - Step 39902: {'lr': 0.0004233598038546812, 'samples': 20430336, 'steps': 39902, 'loss/train': 1.673642873764038} +03/05/2022 12:22:05 - INFO - codeparrot_training - Step 39903: {'lr': 0.0004233559802260244, 'samples': 20430848, 'steps': 39903, 'loss/train': 0.718710720539093} +03/05/2022 12:22:06 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 12:22:10 - INFO - codeparrot_training - Step 39904: {'lr': 0.000423352156519256, 'samples': 20431360, 'steps': 39904, 'loss/train': 1.0814151763916016} +03/05/2022 12:22:13 - INFO - codeparrot_training - Step 39905: {'lr': 0.0004233483327343779, 'samples': 20431872, 'steps': 39905, 'loss/train': 0.939109206199646} +03/05/2022 12:22:14 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/05/2022 12:22:18 - INFO - codeparrot_training - Step 39906: {'lr': 0.0004233445088713916, 'samples': 20432384, 'steps': 39906, 'loss/train': 1.83853280544281} +03/05/2022 12:22:22 - INFO - codeparrot_training - Step 39907: {'lr': 0.000423340684930299, 'samples': 20432896, 'steps': 39907, 'loss/train': 1.2977674007415771} +03/05/2022 12:22:23 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 12:22:27 - INFO - codeparrot_training - Step 39908: {'lr': 0.0004233368609111018, 'samples': 20433408, 'steps': 39908, 'loss/train': 0.5411314964294434} +03/05/2022 12:22:30 - INFO - codeparrot_training - Step 39909: {'lr': 0.00042333303681380165, 'samples': 20433920, 'steps': 39909, 'loss/train': 1.551157832145691} +03/05/2022 12:22:31 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 12:22:35 - INFO - codeparrot_training - Step 39910: {'lr': 0.0004233292126384003, 'samples': 20434432, 'steps': 39910, 'loss/train': 2.000868797302246} +03/05/2022 12:22:38 - INFO - codeparrot_training - Step 39911: {'lr': 0.00042332538838489955, 'samples': 20434944, 'steps': 39911, 'loss/train': 1.230790615081787} +03/05/2022 12:22:39 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 12:22:44 - INFO - codeparrot_training - Step 39912: {'lr': 0.0004233215640533009, 'samples': 20435456, 'steps': 39912, 'loss/train': 1.4630919694900513} +03/05/2022 12:22:47 - INFO - codeparrot_training - Step 39913: {'lr': 0.0004233177396436064, 'samples': 20435968, 'steps': 39913, 'loss/train': 1.5157220363616943} +03/05/2022 12:22:48 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/05/2022 12:22:52 - INFO - codeparrot_training - Step 39914: {'lr': 0.00042331391515581753, 'samples': 20436480, 'steps': 39914, 'loss/train': 2.0832200050354004} +03/05/2022 12:22:55 - INFO - codeparrot_training - Step 39915: {'lr': 0.00042331009058993604, 'samples': 20436992, 'steps': 39915, 'loss/train': 2.32243013381958} +03/05/2022 12:22:56 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 12:23:01 - INFO - codeparrot_training - Step 39916: {'lr': 0.00042330626594596374, 'samples': 20437504, 'steps': 39916, 'loss/train': 1.1027271747589111} +03/05/2022 12:23:04 - INFO - codeparrot_training - Step 39917: {'lr': 0.00042330244122390227, 'samples': 20438016, 'steps': 39917, 'loss/train': 5.9371418952941895} +03/05/2022 12:23:05 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 12:23:09 - INFO - codeparrot_training - Step 39918: {'lr': 0.00042329861642375347, 'samples': 20438528, 'steps': 39918, 'loss/train': 1.3348546028137207} +03/05/2022 12:23:12 - INFO - codeparrot_training - Step 39919: {'lr': 0.00042329479154551897, 'samples': 20439040, 'steps': 39919, 'loss/train': 1.5316449403762817} +03/05/2022 12:23:13 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/05/2022 12:23:18 - INFO - codeparrot_training - Step 39920: {'lr': 0.0004232909665892005, 'samples': 20439552, 'steps': 39920, 'loss/train': 1.6895968914031982} +03/05/2022 12:23:21 - INFO - codeparrot_training - Step 39921: {'lr': 0.00042328714155479973, 'samples': 20440064, 'steps': 39921, 'loss/train': 1.8034844398498535} +03/05/2022 12:23:22 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/05/2022 12:23:26 - INFO - codeparrot_training - Step 39922: {'lr': 0.0004232833164423185, 'samples': 20440576, 'steps': 39922, 'loss/train': 1.015723705291748} +03/05/2022 12:23:29 - INFO - codeparrot_training - Step 39923: {'lr': 0.00042327949125175844, 'samples': 20441088, 'steps': 39923, 'loss/train': 0.6590255498886108} +03/05/2022 12:23:30 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 12:23:34 - INFO - codeparrot_training - Step 39924: {'lr': 0.0004232756659831214, 'samples': 20441600, 'steps': 39924, 'loss/train': 1.729242205619812} +03/05/2022 12:23:38 - INFO - codeparrot_training - Step 39925: {'lr': 0.000423271840636409, 'samples': 20442112, 'steps': 39925, 'loss/train': 2.239431142807007} +03/05/2022 12:23:38 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/05/2022 12:23:43 - INFO - codeparrot_training - Step 39926: {'lr': 0.00042326801521162295, 'samples': 20442624, 'steps': 39926, 'loss/train': 1.656354546546936} +03/05/2022 12:23:46 - INFO - codeparrot_training - Step 39927: {'lr': 0.000423264189708765, 'samples': 20443136, 'steps': 39927, 'loss/train': 2.0217373371124268} +03/05/2022 12:23:46 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 12:23:51 - INFO - codeparrot_training - Step 39928: {'lr': 0.0004232603641278369, 'samples': 20443648, 'steps': 39928, 'loss/train': 1.474001407623291} +03/05/2022 12:23:54 - INFO - codeparrot_training - Step 39929: {'lr': 0.00042325653846884037, 'samples': 20444160, 'steps': 39929, 'loss/train': 0.26926377415657043} +03/05/2022 12:23:55 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 12:24:00 - INFO - codeparrot_training - Step 39930: {'lr': 0.00042325271273177707, 'samples': 20444672, 'steps': 39930, 'loss/train': 0.8286953568458557} +03/05/2022 12:24:03 - INFO - codeparrot_training - Step 39931: {'lr': 0.0004232488869166488, 'samples': 20445184, 'steps': 39931, 'loss/train': 1.9618321657180786} +03/05/2022 12:24:03 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 12:24:08 - INFO - codeparrot_training - Step 39932: {'lr': 0.0004232450610234573, 'samples': 20445696, 'steps': 39932, 'loss/train': 2.256718873977661} +03/05/2022 12:24:12 - INFO - codeparrot_training - Step 39933: {'lr': 0.00042324123505220414, 'samples': 20446208, 'steps': 39933, 'loss/train': 1.951131820678711} +03/05/2022 12:24:12 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 12:24:17 - INFO - codeparrot_training - Step 39934: {'lr': 0.0004232374090028912, 'samples': 20446720, 'steps': 39934, 'loss/train': 1.7848600149154663} +03/05/2022 12:24:20 - INFO - codeparrot_training - Step 39935: {'lr': 0.00042323358287552017, 'samples': 20447232, 'steps': 39935, 'loss/train': 1.392089605331421} +03/05/2022 12:24:20 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 12:24:25 - INFO - codeparrot_training - Step 39936: {'lr': 0.0004232297566700928, 'samples': 20447744, 'steps': 39936, 'loss/train': 1.9984300136566162} +03/05/2022 12:24:28 - INFO - codeparrot_training - Step 39937: {'lr': 0.00042322593038661074, 'samples': 20448256, 'steps': 39937, 'loss/train': 1.966238021850586} +03/05/2022 12:24:29 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 12:24:34 - INFO - codeparrot_training - Step 39938: {'lr': 0.0004232221040250758, 'samples': 20448768, 'steps': 39938, 'loss/train': 2.114082098007202} +03/05/2022 12:24:37 - INFO - codeparrot_training - Step 39939: {'lr': 0.00042321827758548953, 'samples': 20449280, 'steps': 39939, 'loss/train': 2.5734524726867676} +03/05/2022 12:24:37 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/05/2022 12:24:42 - INFO - codeparrot_training - Step 39940: {'lr': 0.00042321445106785385, 'samples': 20449792, 'steps': 39940, 'loss/train': 1.3465629816055298} +03/05/2022 12:24:45 - INFO - codeparrot_training - Step 39941: {'lr': 0.0004232106244721704, 'samples': 20450304, 'steps': 39941, 'loss/train': 1.9060834646224976} +03/05/2022 12:24:46 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/05/2022 12:24:51 - INFO - codeparrot_training - Step 39942: {'lr': 0.0004232067977984409, 'samples': 20450816, 'steps': 39942, 'loss/train': 2.0072197914123535} +03/05/2022 12:24:54 - INFO - codeparrot_training - Step 39943: {'lr': 0.0004232029710466671, 'samples': 20451328, 'steps': 39943, 'loss/train': 1.4308189153671265} +03/05/2022 12:24:54 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 12:24:59 - INFO - codeparrot_training - Step 39944: {'lr': 0.00042319914421685067, 'samples': 20451840, 'steps': 39944, 'loss/train': 1.8284645080566406} +03/05/2022 12:25:02 - INFO - codeparrot_training - Step 39945: {'lr': 0.0004231953173089935, 'samples': 20452352, 'steps': 39945, 'loss/train': 0.23620784282684326} +03/05/2022 12:25:02 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/05/2022 12:25:08 - INFO - codeparrot_training - Step 39946: {'lr': 0.00042319149032309713, 'samples': 20452864, 'steps': 39946, 'loss/train': 1.7196310758590698} +03/05/2022 12:25:11 - INFO - codeparrot_training - Step 39947: {'lr': 0.00042318766325916336, 'samples': 20453376, 'steps': 39947, 'loss/train': 0.1941543072462082} +03/05/2022 12:25:11 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/05/2022 12:25:16 - INFO - codeparrot_training - Step 39948: {'lr': 0.00042318383611719386, 'samples': 20453888, 'steps': 39948, 'loss/train': 1.1694891452789307} +03/05/2022 12:25:19 - INFO - codeparrot_training - Step 39949: {'lr': 0.00042318000889719044, 'samples': 20454400, 'steps': 39949, 'loss/train': 1.7814993858337402} +03/05/2022 12:25:19 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 12:25:25 - INFO - codeparrot_training - Step 39950: {'lr': 0.0004231761815991547, 'samples': 20454912, 'steps': 39950, 'loss/train': 2.330383062362671} +03/05/2022 12:25:28 - INFO - codeparrot_training - Step 39951: {'lr': 0.0004231723542230885, 'samples': 20455424, 'steps': 39951, 'loss/train': 1.6803061962127686} +03/05/2022 12:25:28 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/05/2022 12:25:33 - INFO - codeparrot_training - Step 39952: {'lr': 0.0004231685267689935, 'samples': 20455936, 'steps': 39952, 'loss/train': 1.5172549486160278} +03/05/2022 12:25:37 - INFO - codeparrot_training - Step 39953: {'lr': 0.0004231646992368715, 'samples': 20456448, 'steps': 39953, 'loss/train': 0.7732665538787842} +03/05/2022 12:25:37 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 12:25:42 - INFO - codeparrot_training - Step 39954: {'lr': 0.00042316087162672415, 'samples': 20456960, 'steps': 39954, 'loss/train': 1.7542901039123535} +03/05/2022 12:25:45 - INFO - codeparrot_training - Step 39955: {'lr': 0.0004231570439385531, 'samples': 20457472, 'steps': 39955, 'loss/train': 1.266332745552063} +03/05/2022 12:25:45 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/05/2022 12:25:50 - INFO - codeparrot_training - Step 39956: {'lr': 0.0004231532161723602, 'samples': 20457984, 'steps': 39956, 'loss/train': 1.6283401250839233} +03/05/2022 12:25:54 - INFO - codeparrot_training - Step 39957: {'lr': 0.0004231493883281471, 'samples': 20458496, 'steps': 39957, 'loss/train': 1.474109411239624} +03/05/2022 12:25:54 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 12:25:59 - INFO - codeparrot_training - Step 39958: {'lr': 0.00042314556040591567, 'samples': 20459008, 'steps': 39958, 'loss/train': 1.4236756563186646} +03/05/2022 12:26:02 - INFO - codeparrot_training - Step 39959: {'lr': 0.0004231417324056674, 'samples': 20459520, 'steps': 39959, 'loss/train': 1.503373622894287} +03/05/2022 12:26:03 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 12:26:07 - INFO - codeparrot_training - Step 39960: {'lr': 0.00042313790432740416, 'samples': 20460032, 'steps': 39960, 'loss/train': 1.8329119682312012} +03/05/2022 12:26:11 - INFO - codeparrot_training - Step 39961: {'lr': 0.00042313407617112765, 'samples': 20460544, 'steps': 39961, 'loss/train': 1.270346760749817} +03/05/2022 12:26:12 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/05/2022 12:26:16 - INFO - codeparrot_training - Step 39962: {'lr': 0.00042313024793683965, 'samples': 20461056, 'steps': 39962, 'loss/train': 1.2669391632080078} +03/05/2022 12:26:19 - INFO - codeparrot_training - Step 39963: {'lr': 0.0004231264196245418, 'samples': 20461568, 'steps': 39963, 'loss/train': 0.5560345649719238} +03/05/2022 12:26:21 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 12:26:24 - INFO - codeparrot_training - Step 39964: {'lr': 0.00042312259123423584, 'samples': 20462080, 'steps': 39964, 'loss/train': 1.206254243850708} +03/05/2022 12:26:27 - INFO - codeparrot_training - Step 39965: {'lr': 0.00042311876276592355, 'samples': 20462592, 'steps': 39965, 'loss/train': 2.0872819423675537} +03/05/2022 12:26:29 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 12:26:33 - INFO - codeparrot_training - Step 39966: {'lr': 0.00042311493421960656, 'samples': 20463104, 'steps': 39966, 'loss/train': 1.771786093711853} +03/05/2022 12:26:36 - INFO - codeparrot_training - Step 39967: {'lr': 0.0004231111055952867, 'samples': 20463616, 'steps': 39967, 'loss/train': 1.4013031721115112} +03/05/2022 12:26:38 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/05/2022 12:26:41 - INFO - codeparrot_training - Step 39968: {'lr': 0.00042310727689296563, 'samples': 20464128, 'steps': 39968, 'loss/train': 1.8335704803466797} +03/05/2022 12:26:44 - INFO - codeparrot_training - Step 39969: {'lr': 0.0004231034481126451, 'samples': 20464640, 'steps': 39969, 'loss/train': 2.144958019256592} +03/05/2022 12:26:46 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 12:26:50 - INFO - codeparrot_training - Step 39970: {'lr': 0.0004230996192543268, 'samples': 20465152, 'steps': 39970, 'loss/train': 2.2452504634857178} +03/05/2022 12:26:53 - INFO - codeparrot_training - Step 39971: {'lr': 0.0004230957903180125, 'samples': 20465664, 'steps': 39971, 'loss/train': 1.6215338706970215} +03/05/2022 12:26:55 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/05/2022 12:26:58 - INFO - codeparrot_training - Step 39972: {'lr': 0.00042309196130370396, 'samples': 20466176, 'steps': 39972, 'loss/train': 2.400557518005371} +03/05/2022 12:27:01 - INFO - codeparrot_training - Step 39973: {'lr': 0.00042308813221140275, 'samples': 20466688, 'steps': 39973, 'loss/train': 1.5071148872375488} +03/05/2022 12:27:04 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 12:27:06 - INFO - codeparrot_training - Step 39974: {'lr': 0.00042308430304111076, 'samples': 20467200, 'steps': 39974, 'loss/train': 1.9544938802719116} +03/05/2022 12:27:10 - INFO - codeparrot_training - Step 39975: {'lr': 0.00042308047379282967, 'samples': 20467712, 'steps': 39975, 'loss/train': 2.3968615531921387} +03/05/2022 12:27:12 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/05/2022 12:27:15 - INFO - codeparrot_training - Step 39976: {'lr': 0.00042307664446656116, 'samples': 20468224, 'steps': 39976, 'loss/train': 1.7354118824005127} +03/05/2022 12:27:18 - INFO - codeparrot_training - Step 39977: {'lr': 0.000423072815062307, 'samples': 20468736, 'steps': 39977, 'loss/train': 1.2915846109390259} +03/05/2022 12:27:20 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/05/2022 12:27:23 - INFO - codeparrot_training - Step 39978: {'lr': 0.0004230689855800689, 'samples': 20469248, 'steps': 39978, 'loss/train': 2.4140164852142334} +03/05/2022 12:27:26 - INFO - codeparrot_training - Step 39979: {'lr': 0.0004230651560198486, 'samples': 20469760, 'steps': 39979, 'loss/train': 2.227933883666992} +03/05/2022 12:27:28 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/05/2022 12:27:32 - INFO - codeparrot_training - Step 39980: {'lr': 0.0004230613263816478, 'samples': 20470272, 'steps': 39980, 'loss/train': 1.7130097150802612} +03/05/2022 12:27:35 - INFO - codeparrot_training - Step 39981: {'lr': 0.0004230574966654682, 'samples': 20470784, 'steps': 39981, 'loss/train': 1.5346124172210693} +03/05/2022 12:27:37 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/05/2022 12:27:40 - INFO - codeparrot_training - Step 39982: {'lr': 0.0004230536668713116, 'samples': 20471296, 'steps': 39982, 'loss/train': 1.1819233894348145} +03/05/2022 12:27:43 - INFO - codeparrot_training - Step 39983: {'lr': 0.00042304983699917965, 'samples': 20471808, 'steps': 39983, 'loss/train': 1.9265443086624146} +03/05/2022 12:27:45 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 12:27:49 - INFO - codeparrot_training - Step 39984: {'lr': 0.00042304600704907416, 'samples': 20472320, 'steps': 39984, 'loss/train': 1.5331470966339111} +03/05/2022 12:27:52 - INFO - codeparrot_training - Step 39985: {'lr': 0.0004230421770209968, 'samples': 20472832, 'steps': 39985, 'loss/train': 1.3011888265609741} +03/05/2022 12:27:54 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/05/2022 12:27:57 - INFO - codeparrot_training - Step 39986: {'lr': 0.0004230383469149493, 'samples': 20473344, 'steps': 39986, 'loss/train': 0.3527682423591614} +03/05/2022 12:28:00 - INFO - codeparrot_training - Step 39987: {'lr': 0.0004230345167309334, 'samples': 20473856, 'steps': 39987, 'loss/train': 2.1957151889801025} +03/05/2022 12:28:02 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/05/2022 12:28:06 - INFO - codeparrot_training - Step 39988: {'lr': 0.00042303068646895077, 'samples': 20474368, 'steps': 39988, 'loss/train': 0.8951796889305115} +03/05/2022 12:28:09 - INFO - codeparrot_training - Step 39989: {'lr': 0.0004230268561290032, 'samples': 20474880, 'steps': 39989, 'loss/train': 2.2042088508605957} +03/05/2022 12:28:11 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 12:28:14 - INFO - codeparrot_training - Step 39990: {'lr': 0.0004230230257110924, 'samples': 20475392, 'steps': 39990, 'loss/train': 1.936219334602356} +03/05/2022 12:28:17 - INFO - codeparrot_training - Step 39991: {'lr': 0.00042301919521522014, 'samples': 20475904, 'steps': 39991, 'loss/train': 1.3299697637557983} +03/05/2022 12:28:19 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 12:28:23 - INFO - codeparrot_training - Step 39992: {'lr': 0.0004230153646413881, 'samples': 20476416, 'steps': 39992, 'loss/train': 1.776450753211975} +03/05/2022 12:28:26 - INFO - codeparrot_training - Step 39993: {'lr': 0.000423011533989598, 'samples': 20476928, 'steps': 39993, 'loss/train': 1.0705969333648682} +03/05/2022 12:28:29 - INFO - codeparrot_training - Step 39994: {'lr': 0.0004230077032598515, 'samples': 20477440, 'steps': 39994, 'loss/train': 1.501052737236023} +03/05/2022 12:28:30 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 12:28:34 - INFO - codeparrot_training - Step 39995: {'lr': 0.00042300387245215043, 'samples': 20477952, 'steps': 39995, 'loss/train': 1.7614675760269165} +03/05/2022 12:28:37 - INFO - codeparrot_training - Step 39996: {'lr': 0.00042300004156649654, 'samples': 20478464, 'steps': 39996, 'loss/train': 1.4517847299575806} +03/05/2022 12:28:38 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/05/2022 12:28:43 - INFO - codeparrot_training - Step 39997: {'lr': 0.0004229962106028914, 'samples': 20478976, 'steps': 39997, 'loss/train': 1.92573881149292} +03/05/2022 12:28:46 - INFO - codeparrot_training - Step 39998: {'lr': 0.0004229923795613369, 'samples': 20479488, 'steps': 39998, 'loss/train': 1.906799554824829} +03/05/2022 12:28:46 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 12:28:51 - INFO - codeparrot_training - Step 39999: {'lr': 0.00042298854844183476, 'samples': 20480000, 'steps': 39999, 'loss/train': 1.882041096687317} +03/05/2022 12:28:51 - INFO - codeparrot_training - Evaluating and saving model checkpoint