diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -47585,3 +47585,2507 @@ Use FP16 precision: False 12/28/2021 21:17:48 - INFO - codeparrot_training - Step 44999: {'lr': 1.2326147576179142e-05, 'samples': 23040000, 'steps': 44999, 'batch_loss/train': 0.7561318734660745} 12/28/2021 21:17:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint 12/28/2021 21:21:11 - INFO - codeparrot_training - Step 45000: {'loss/eval': 0.7414663434028625, 'perplexity': 2.099011182785034} +12/28/2021 21:21:30 - WARNING - huggingface_hub.repository - Several commits (16) will be pushed upstream. +12/28/2021 21:21:46 - INFO - codeparrot_training - Step 45000: {'lr': 1.2321259488269537e-05, 'samples': 23040512, 'steps': 45000, 'batch_loss/train': 0.7388205407187343} +12/28/2021 21:21:57 - INFO - codeparrot_training - Step 45001: {'lr': 1.2316372345280824e-05, 'samples': 23041024, 'steps': 45001, 'batch_loss/train': 0.7499255004804581} +12/28/2021 21:22:08 - INFO - codeparrot_training - Step 45002: {'lr': 1.2311486147232487e-05, 'samples': 23041536, 'steps': 45002, 'batch_loss/train': 0.7241759057505988} +12/28/2021 21:22:20 - INFO - codeparrot_training - Step 45003: {'lr': 1.2306600894143954e-05, 'samples': 23042048, 'steps': 45003, 'batch_loss/train': 0.6472440086654387} +12/28/2021 21:22:30 - INFO - codeparrot_training - Step 45004: {'lr': 1.2301716586034573e-05, 'samples': 23042560, 'steps': 45004, 'batch_loss/train': 0.7359280455857515} +12/28/2021 21:22:41 - INFO - codeparrot_training - Step 45005: {'lr': 1.2296833222923853e-05, 'samples': 23043072, 'steps': 45005, 'batch_loss/train': 0.6243610216770321} +12/28/2021 21:22:53 - INFO - codeparrot_training - Step 45006: {'lr': 1.229195080483117e-05, 'samples': 23043584, 'steps': 45006, 'batch_loss/train': 0.6903959596529603} +12/28/2021 21:23:04 - INFO - codeparrot_training - Step 45007: {'lr': 1.2287069331775924e-05, 'samples': 23044096, 'steps': 45007, 'batch_loss/train': 0.7088620727881789} +12/28/2021 21:23:14 - INFO - codeparrot_training - Step 45008: {'lr': 1.2282188803777544e-05, 'samples': 23044608, 'steps': 45008, 'batch_loss/train': 0.6584864500910044} +12/28/2021 21:23:25 - INFO - codeparrot_training - Step 45009: {'lr': 1.2277309220855404e-05, 'samples': 23045120, 'steps': 45009, 'batch_loss/train': 0.7067359153879806} +12/28/2021 21:23:37 - INFO - codeparrot_training - Step 45010: {'lr': 1.2272430583028932e-05, 'samples': 23045632, 'steps': 45010, 'batch_loss/train': 0.7558464175090194} +12/28/2021 21:23:48 - INFO - codeparrot_training - Step 45011: {'lr': 1.2267552890317501e-05, 'samples': 23046144, 'steps': 45011, 'batch_loss/train': 0.6812909061554819} +12/28/2021 21:23:58 - INFO - codeparrot_training - Step 45012: {'lr': 1.2262676142740542e-05, 'samples': 23046656, 'steps': 45012, 'batch_loss/train': 0.7566838106140494} +12/28/2021 21:24:12 - INFO - codeparrot_training - Step 45013: {'lr': 1.2257800340317399e-05, 'samples': 23047168, 'steps': 45013, 'batch_loss/train': 0.6624577877810225} +12/28/2021 21:24:22 - INFO - codeparrot_training - Step 45014: {'lr': 1.22529254830675e-05, 'samples': 23047680, 'steps': 45014, 'batch_loss/train': 0.7585712759755552} +12/28/2021 21:24:33 - INFO - codeparrot_training - Step 45015: {'lr': 1.2248051571010138e-05, 'samples': 23048192, 'steps': 45015, 'batch_loss/train': 0.6877373078023084} +12/28/2021 21:24:45 - INFO - codeparrot_training - Step 45016: {'lr': 1.2243178604164796e-05, 'samples': 23048704, 'steps': 45016, 'batch_loss/train': 0.8030269555747509} +12/28/2021 21:24:56 - INFO - codeparrot_training - Step 45017: {'lr': 1.2238306582550818e-05, 'samples': 23049216, 'steps': 45017, 'batch_loss/train': 0.6700289399595931} +12/28/2021 21:25:06 - INFO - codeparrot_training - Step 45018: {'lr': 1.2233435506187551e-05, 'samples': 23049728, 'steps': 45018, 'batch_loss/train': 0.7592328507453203} +12/28/2021 21:25:17 - INFO - codeparrot_training - Step 45019: {'lr': 1.2228565375094313e-05, 'samples': 23050240, 'steps': 45019, 'batch_loss/train': 0.7161308089271188} +12/28/2021 21:25:31 - INFO - codeparrot_training - Step 45020: {'lr': 1.222369618929059e-05, 'samples': 23050752, 'steps': 45020, 'batch_loss/train': 0.7606823602691293} +12/28/2021 21:25:42 - INFO - codeparrot_training - Step 45021: {'lr': 1.2218827948795641e-05, 'samples': 23051264, 'steps': 45021, 'batch_loss/train': 0.47170426324009895} +12/28/2021 21:25:53 - INFO - codeparrot_training - Step 45022: {'lr': 1.2213960653628814e-05, 'samples': 23051776, 'steps': 45022, 'batch_loss/train': 0.6969932010397315} +12/28/2021 21:26:05 - INFO - codeparrot_training - Step 45023: {'lr': 1.2209094303809564e-05, 'samples': 23052288, 'steps': 45023, 'batch_loss/train': 0.6803211309015751} +12/28/2021 21:26:15 - INFO - codeparrot_training - Step 45024: {'lr': 1.2204228899357128e-05, 'samples': 23052800, 'steps': 45024, 'batch_loss/train': 0.6747496607713401} +12/28/2021 21:26:26 - INFO - codeparrot_training - Step 45025: {'lr': 1.2199364440290877e-05, 'samples': 23053312, 'steps': 45025, 'batch_loss/train': 0.7391664991155267} +12/28/2021 21:26:38 - INFO - codeparrot_training - Step 45026: {'lr': 1.2194500926630187e-05, 'samples': 23053824, 'steps': 45026, 'batch_loss/train': 0.6956963350530714} +12/28/2021 21:26:49 - INFO - codeparrot_training - Step 45027: {'lr': 1.2189638358394373e-05, 'samples': 23054336, 'steps': 45027, 'batch_loss/train': 0.8028994733467698} +12/28/2021 21:26:59 - INFO - codeparrot_training - Step 45028: {'lr': 1.2184776735602754e-05, 'samples': 23054848, 'steps': 45028, 'batch_loss/train': 0.6697774371132255} +12/28/2021 21:27:10 - INFO - codeparrot_training - Step 45029: {'lr': 1.2179916058274676e-05, 'samples': 23055360, 'steps': 45029, 'batch_loss/train': 0.7647643140517175} +12/28/2021 21:27:24 - INFO - codeparrot_training - Step 45030: {'lr': 1.217505632642943e-05, 'samples': 23055872, 'steps': 45030, 'batch_loss/train': 0.6615564168896526} +12/28/2021 21:27:35 - INFO - codeparrot_training - Step 45031: {'lr': 1.2170197540086358e-05, 'samples': 23056384, 'steps': 45031, 'batch_loss/train': 0.7597562554292381} +12/28/2021 21:27:45 - INFO - codeparrot_training - Step 45032: {'lr': 1.2165339699264838e-05, 'samples': 23056896, 'steps': 45032, 'batch_loss/train': 0.6721908585168421} +12/28/2021 21:27:57 - INFO - codeparrot_training - Step 45033: {'lr': 1.2160482803984074e-05, 'samples': 23057408, 'steps': 45033, 'batch_loss/train': 0.7449830938130617} +12/28/2021 21:28:08 - INFO - codeparrot_training - Step 45034: {'lr': 1.2155626854263441e-05, 'samples': 23057920, 'steps': 45034, 'batch_loss/train': 0.7669051168486476} +12/28/2021 21:28:19 - INFO - codeparrot_training - Step 45035: {'lr': 1.2150771850122227e-05, 'samples': 23058432, 'steps': 45035, 'batch_loss/train': 0.6152602593356278} +12/28/2021 21:28:33 - INFO - codeparrot_training - Step 45036: {'lr': 1.2145917791579724e-05, 'samples': 23058944, 'steps': 45036, 'batch_loss/train': 0.748514523729682} +12/28/2021 21:28:43 - INFO - codeparrot_training - Step 45037: {'lr': 1.214106467865525e-05, 'samples': 23059456, 'steps': 45037, 'batch_loss/train': 0.6869725696742535} +12/28/2021 21:28:54 - INFO - codeparrot_training - Step 45038: {'lr': 1.2136212511368094e-05, 'samples': 23059968, 'steps': 45038, 'batch_loss/train': 0.668008507695049} +12/28/2021 21:29:06 - INFO - codeparrot_training - Step 45039: {'lr': 1.2131361289737547e-05, 'samples': 23060480, 'steps': 45039, 'batch_loss/train': 0.7238097100635059} +12/28/2021 21:29:17 - INFO - codeparrot_training - Step 45040: {'lr': 1.2126511013782871e-05, 'samples': 23060992, 'steps': 45040, 'batch_loss/train': 0.6564555326476693} +12/28/2021 21:29:27 - INFO - codeparrot_training - Step 45041: {'lr': 1.2121661683523382e-05, 'samples': 23061504, 'steps': 45041, 'batch_loss/train': 0.7985632829368114} +12/28/2021 21:29:38 - INFO - codeparrot_training - Step 45042: {'lr': 1.2116813298978347e-05, 'samples': 23062016, 'steps': 45042, 'batch_loss/train': 0.7289464806672186} +12/28/2021 21:29:50 - INFO - codeparrot_training - Step 45043: {'lr': 1.2111965860167079e-05, 'samples': 23062528, 'steps': 45043, 'batch_loss/train': 0.6300630713813007} +12/28/2021 21:30:01 - INFO - codeparrot_training - Step 45044: {'lr': 1.210711936710876e-05, 'samples': 23063040, 'steps': 45044, 'batch_loss/train': 0.7316146837547421} +12/28/2021 21:30:11 - INFO - codeparrot_training - Step 45045: {'lr': 1.2102273819822707e-05, 'samples': 23063552, 'steps': 45045, 'batch_loss/train': 0.7828474510461092} +12/28/2021 21:30:24 - INFO - codeparrot_training - Step 45046: {'lr': 1.2097429218328237e-05, 'samples': 23064064, 'steps': 45046, 'batch_loss/train': 0.6406933176331222} +12/28/2021 21:30:34 - INFO - codeparrot_training - Step 45047: {'lr': 1.2092585562644504e-05, 'samples': 23064576, 'steps': 45047, 'batch_loss/train': 0.8760713180527091} +12/28/2021 21:30:45 - INFO - codeparrot_training - Step 45048: {'lr': 1.2087742852790822e-05, 'samples': 23065088, 'steps': 45048, 'batch_loss/train': 0.6311043673194945} +12/28/2021 21:30:57 - INFO - codeparrot_training - Step 45049: {'lr': 1.2082901088786485e-05, 'samples': 23065600, 'steps': 45049, 'batch_loss/train': 0.675655156839639} +12/28/2021 21:31:08 - INFO - codeparrot_training - Step 45050: {'lr': 1.2078060270650699e-05, 'samples': 23066112, 'steps': 45050, 'batch_loss/train': 0.7452592477202415} +12/28/2021 21:31:18 - INFO - codeparrot_training - Step 45051: {'lr': 1.2073220398402668e-05, 'samples': 23066624, 'steps': 45051, 'batch_loss/train': 0.5943583841435611} +12/28/2021 21:31:29 - INFO - codeparrot_training - Step 45052: {'lr': 1.2068381472061713e-05, 'samples': 23067136, 'steps': 45052, 'batch_loss/train': 0.6296164027298801} +12/28/2021 21:31:43 - INFO - codeparrot_training - Step 45053: {'lr': 1.2063543491647011e-05, 'samples': 23067648, 'steps': 45053, 'batch_loss/train': 0.7326388363726437} +12/28/2021 21:31:54 - INFO - codeparrot_training - Step 45054: {'lr': 1.2058706457177826e-05, 'samples': 23068160, 'steps': 45054, 'batch_loss/train': 0.7313567204400897} +12/28/2021 21:32:04 - INFO - codeparrot_training - Step 45055: {'lr': 1.2053870368673392e-05, 'samples': 23068672, 'steps': 45055, 'batch_loss/train': 0.6825571060180664} +12/28/2021 21:32:16 - INFO - codeparrot_training - Step 45056: {'lr': 1.2049035226152888e-05, 'samples': 23069184, 'steps': 45056, 'batch_loss/train': 0.8042012131772935} +12/28/2021 21:32:27 - INFO - codeparrot_training - Step 45057: {'lr': 1.2044201029635604e-05, 'samples': 23069696, 'steps': 45057, 'batch_loss/train': 0.737498982809484} +12/28/2021 21:32:38 - INFO - codeparrot_training - Step 45058: {'lr': 1.2039367779140748e-05, 'samples': 23070208, 'steps': 45058, 'batch_loss/train': 0.7586365723982453} +12/28/2021 21:32:51 - INFO - codeparrot_training - Step 45059: {'lr': 1.2034535474687442e-05, 'samples': 23070720, 'steps': 45059, 'batch_loss/train': 0.8371688146144152} +12/28/2021 21:33:01 - INFO - codeparrot_training - Step 45060: {'lr': 1.202970411629506e-05, 'samples': 23071232, 'steps': 45060, 'batch_loss/train': 1.5028177560307086} +12/28/2021 21:33:12 - INFO - codeparrot_training - Step 45061: {'lr': 1.2024873703982669e-05, 'samples': 23071744, 'steps': 45061, 'batch_loss/train': 0.7540037084836513} +12/28/2021 21:33:22 - INFO - codeparrot_training - Step 45062: {'lr': 1.2020044237769533e-05, 'samples': 23072256, 'steps': 45062, 'batch_loss/train': 0.774959710193798} +12/28/2021 21:33:35 - INFO - codeparrot_training - Step 45063: {'lr': 1.2015215717674832e-05, 'samples': 23072768, 'steps': 45063, 'batch_loss/train': 0.7192422905936837} +12/28/2021 21:33:45 - INFO - codeparrot_training - Step 45064: {'lr': 1.2010388143717798e-05, 'samples': 23073280, 'steps': 45064, 'batch_loss/train': 0.647829485940747} +12/28/2021 21:33:56 - INFO - codeparrot_training - Step 45065: {'lr': 1.2005561515917584e-05, 'samples': 23073792, 'steps': 45065, 'batch_loss/train': 0.6883272877894342} +12/28/2021 21:34:08 - INFO - codeparrot_training - Step 45066: {'lr': 1.2000735834293396e-05, 'samples': 23074304, 'steps': 45066, 'batch_loss/train': 0.7192554154898971} +12/28/2021 21:34:19 - INFO - codeparrot_training - Step 45067: {'lr': 1.1995911098864415e-05, 'samples': 23074816, 'steps': 45067, 'batch_loss/train': 0.6047175964340568} +12/28/2021 21:34:29 - INFO - codeparrot_training - Step 45068: {'lr': 1.1991087309649818e-05, 'samples': 23075328, 'steps': 45068, 'batch_loss/train': 0.7046756665222347} +12/28/2021 21:34:44 - INFO - codeparrot_training - Step 45069: {'lr': 1.1986264466668839e-05, 'samples': 23075840, 'steps': 45069, 'batch_loss/train': 0.740153732476756} +12/28/2021 21:34:55 - INFO - codeparrot_training - Step 45070: {'lr': 1.1981442569940549e-05, 'samples': 23076352, 'steps': 45070, 'batch_loss/train': 0.7268863599747419} +12/28/2021 21:35:06 - INFO - codeparrot_training - Step 45071: {'lr': 1.1976621619484152e-05, 'samples': 23076864, 'steps': 45071, 'batch_loss/train': 0.6751030939631164} +12/28/2021 21:35:18 - INFO - codeparrot_training - Step 45072: {'lr': 1.1971801615318884e-05, 'samples': 23077376, 'steps': 45072, 'batch_loss/train': 0.7915262130554765} +12/28/2021 21:35:29 - INFO - codeparrot_training - Step 45073: {'lr': 1.196698255746384e-05, 'samples': 23077888, 'steps': 45073, 'batch_loss/train': 0.7022106060176156} +12/28/2021 21:35:39 - INFO - codeparrot_training - Step 45074: {'lr': 1.1962164445938173e-05, 'samples': 23078400, 'steps': 45074, 'batch_loss/train': 0.7481046519242227} +12/28/2021 21:35:50 - INFO - codeparrot_training - Step 45075: {'lr': 1.1957347280761088e-05, 'samples': 23078912, 'steps': 45075, 'batch_loss/train': 0.6991078602150083} +12/28/2021 21:36:04 - INFO - codeparrot_training - Step 45076: {'lr': 1.195253106195171e-05, 'samples': 23079424, 'steps': 45076, 'batch_loss/train': 0.7975812293589115} +12/28/2021 21:36:14 - INFO - codeparrot_training - Step 45077: {'lr': 1.194771578952919e-05, 'samples': 23079936, 'steps': 45077, 'batch_loss/train': 0.7183679258450866} +12/28/2021 21:36:25 - INFO - codeparrot_training - Step 45078: {'lr': 1.1942901463512651e-05, 'samples': 23080448, 'steps': 45078, 'batch_loss/train': 0.6687115444801748} +12/28/2021 21:36:37 - INFO - codeparrot_training - Step 45079: {'lr': 1.1938088083921245e-05, 'samples': 23080960, 'steps': 45079, 'batch_loss/train': 0.7518635876476765} +12/28/2021 21:36:48 - INFO - codeparrot_training - Step 45080: {'lr': 1.1933275650774122e-05, 'samples': 23081472, 'steps': 45080, 'batch_loss/train': 0.7558619352057576} +12/28/2021 21:36:58 - INFO - codeparrot_training - Step 45081: {'lr': 1.1928464164090408e-05, 'samples': 23081984, 'steps': 45081, 'batch_loss/train': 0.750448209233582} +12/28/2021 21:37:10 - INFO - codeparrot_training - Step 45082: {'lr': 1.1923653623889197e-05, 'samples': 23082496, 'steps': 45082, 'batch_loss/train': 0.7628919137641788} +12/28/2021 21:37:21 - INFO - codeparrot_training - Step 45083: {'lr': 1.1918844030189668e-05, 'samples': 23083008, 'steps': 45083, 'batch_loss/train': 0.7362244976684451} +12/28/2021 21:37:32 - INFO - codeparrot_training - Step 45084: {'lr': 1.1914035383010945e-05, 'samples': 23083520, 'steps': 45084, 'batch_loss/train': 0.6899639358744025} +12/28/2021 21:37:44 - INFO - codeparrot_training - Step 45085: {'lr': 1.190922768237207e-05, 'samples': 23084032, 'steps': 45085, 'batch_loss/train': 0.7373720174655318} +12/28/2021 21:37:55 - INFO - codeparrot_training - Step 45086: {'lr': 1.190442092829222e-05, 'samples': 23084544, 'steps': 45086, 'batch_loss/train': 0.8320061499252915} +12/28/2021 21:38:05 - INFO - codeparrot_training - Step 45087: {'lr': 1.1899615120790547e-05, 'samples': 23085056, 'steps': 45087, 'batch_loss/train': 0.781310370832216} +12/28/2021 21:38:16 - INFO - codeparrot_training - Step 45088: {'lr': 1.1894810259886063e-05, 'samples': 23085568, 'steps': 45088, 'batch_loss/train': 0.6484068525023758} +12/28/2021 21:38:30 - INFO - codeparrot_training - Step 45089: {'lr': 1.1890006345597892e-05, 'samples': 23086080, 'steps': 45089, 'batch_loss/train': 0.694540711119771} +12/28/2021 21:38:41 - INFO - codeparrot_training - Step 45090: {'lr': 1.1885203377945158e-05, 'samples': 23086592, 'steps': 45090, 'batch_loss/train': 0.4748491674545221} +12/28/2021 21:38:52 - INFO - codeparrot_training - Step 45091: {'lr': 1.188040135694693e-05, 'samples': 23087104, 'steps': 45091, 'batch_loss/train': 0.6762011111713946} +12/28/2021 21:39:04 - INFO - codeparrot_training - Step 45092: {'lr': 1.1875600282622328e-05, 'samples': 23087616, 'steps': 45092, 'batch_loss/train': 0.8015128504484892} +12/28/2021 21:39:14 - INFO - codeparrot_training - Step 45093: {'lr': 1.1870800154990453e-05, 'samples': 23088128, 'steps': 45093, 'batch_loss/train': 0.7680754195898771} +12/28/2021 21:39:25 - INFO - codeparrot_training - Step 45094: {'lr': 1.186600097407034e-05, 'samples': 23088640, 'steps': 45094, 'batch_loss/train': 0.6349109052098356} +12/28/2021 21:39:37 - INFO - codeparrot_training - Step 45095: {'lr': 1.1861202739881116e-05, 'samples': 23089152, 'steps': 45095, 'batch_loss/train': 0.720810909755528} +12/28/2021 21:39:48 - INFO - codeparrot_training - Step 45096: {'lr': 1.1856405452441765e-05, 'samples': 23089664, 'steps': 45096, 'batch_loss/train': 0.8004782989737578} +12/28/2021 21:39:58 - INFO - codeparrot_training - Step 45097: {'lr': 1.1851609111771466e-05, 'samples': 23090176, 'steps': 45097, 'batch_loss/train': 0.6310273174894974} +12/28/2021 21:40:09 - INFO - codeparrot_training - Step 45098: {'lr': 1.1846813717889288e-05, 'samples': 23090688, 'steps': 45098, 'batch_loss/train': 0.7178476499393582} +12/28/2021 21:40:23 - INFO - codeparrot_training - Step 45099: {'lr': 1.1842019270814214e-05, 'samples': 23091200, 'steps': 45099, 'batch_loss/train': 0.6204731618054211} +12/28/2021 21:40:33 - INFO - codeparrot_training - Step 45100: {'lr': 1.1837225770565312e-05, 'samples': 23091712, 'steps': 45100, 'batch_loss/train': 0.6634516839403659} +12/28/2021 21:40:44 - INFO - codeparrot_training - Step 45101: {'lr': 1.1832433217161764e-05, 'samples': 23092224, 'steps': 45101, 'batch_loss/train': 0.6729153123451397} +12/28/2021 21:40:56 - INFO - codeparrot_training - Step 45102: {'lr': 1.182764161062247e-05, 'samples': 23092736, 'steps': 45102, 'batch_loss/train': 0.6145767402485944} +12/28/2021 21:41:07 - INFO - codeparrot_training - Step 45103: {'lr': 1.1822850950966551e-05, 'samples': 23093248, 'steps': 45103, 'batch_loss/train': 0.7177111972123384} +12/28/2021 21:41:17 - INFO - codeparrot_training - Step 45104: {'lr': 1.1818061238213052e-05, 'samples': 23093760, 'steps': 45104, 'batch_loss/train': 0.7314780526794493} +12/28/2021 21:41:31 - INFO - codeparrot_training - Step 45105: {'lr': 1.1813272472381009e-05, 'samples': 23094272, 'steps': 45105, 'batch_loss/train': 0.7638329109176993} +12/28/2021 21:41:42 - INFO - codeparrot_training - Step 45106: {'lr': 1.1808484653489437e-05, 'samples': 23094784, 'steps': 45106, 'batch_loss/train': 0.6526569481939077} +12/28/2021 21:41:53 - INFO - codeparrot_training - Step 45107: {'lr': 1.180369778155746e-05, 'samples': 23095296, 'steps': 45107, 'batch_loss/train': 0.7360106627456844} +12/28/2021 21:42:05 - INFO - codeparrot_training - Step 45108: {'lr': 1.179891185660395e-05, 'samples': 23095808, 'steps': 45108, 'batch_loss/train': 0.6459036413580179} +12/28/2021 21:42:15 - INFO - codeparrot_training - Step 45109: {'lr': 1.179412687864806e-05, 'samples': 23096320, 'steps': 45109, 'batch_loss/train': 0.7211948551703244} +12/28/2021 21:42:26 - INFO - codeparrot_training - Step 45110: {'lr': 1.1789342847708828e-05, 'samples': 23096832, 'steps': 45110, 'batch_loss/train': 0.6632023984566331} +12/28/2021 21:42:37 - INFO - codeparrot_training - Step 45111: {'lr': 1.178455976380513e-05, 'samples': 23097344, 'steps': 45111, 'batch_loss/train': 0.6374338078312576} +12/28/2021 21:42:49 - INFO - codeparrot_training - Step 45112: {'lr': 1.1779777626956117e-05, 'samples': 23097856, 'steps': 45112, 'batch_loss/train': 0.7772199315950274} +12/28/2021 21:43:00 - INFO - codeparrot_training - Step 45113: {'lr': 1.1774996437180801e-05, 'samples': 23098368, 'steps': 45113, 'batch_loss/train': 0.8973648566752672} +12/28/2021 21:43:10 - INFO - codeparrot_training - Step 45114: {'lr': 1.1770216194498113e-05, 'samples': 23098880, 'steps': 45114, 'batch_loss/train': 0.7833612665999681} +12/28/2021 21:43:24 - INFO - codeparrot_training - Step 45115: {'lr': 1.1765436898927118e-05, 'samples': 23099392, 'steps': 45115, 'batch_loss/train': 0.6017526917858049} +12/28/2021 21:43:35 - INFO - codeparrot_training - Step 45116: {'lr': 1.1760658550486747e-05, 'samples': 23099904, 'steps': 45116, 'batch_loss/train': 0.7072297921404243} +12/28/2021 21:43:46 - INFO - codeparrot_training - Step 45117: {'lr': 1.1755881149196068e-05, 'samples': 23100416, 'steps': 45117, 'batch_loss/train': 0.6999191669747233} +12/28/2021 21:43:58 - INFO - codeparrot_training - Step 45118: {'lr': 1.1751104695074038e-05, 'samples': 23100928, 'steps': 45118, 'batch_loss/train': 0.7282848000177182} +12/28/2021 21:44:09 - INFO - codeparrot_training - Step 45119: {'lr': 1.174632918813967e-05, 'samples': 23101440, 'steps': 45119, 'batch_loss/train': 0.5183316124603152} +12/28/2021 21:44:19 - INFO - codeparrot_training - Step 45120: {'lr': 1.1741554628411921e-05, 'samples': 23101952, 'steps': 45120, 'batch_loss/train': 0.7663863329216838} +12/28/2021 21:44:30 - INFO - codeparrot_training - Step 45121: {'lr': 1.1736781015909803e-05, 'samples': 23102464, 'steps': 45121, 'batch_loss/train': 0.6768820327706635} +12/28/2021 21:44:42 - INFO - codeparrot_training - Step 45122: {'lr': 1.1732008350652273e-05, 'samples': 23102976, 'steps': 45122, 'batch_loss/train': 0.6983950262656435} +12/28/2021 21:44:53 - INFO - codeparrot_training - Step 45123: {'lr': 1.1727236632658317e-05, 'samples': 23103488, 'steps': 45123, 'batch_loss/train': 0.8431602651253343} +12/28/2021 21:45:03 - INFO - codeparrot_training - Step 45124: {'lr': 1.1722465861946918e-05, 'samples': 23104000, 'steps': 45124, 'batch_loss/train': 1.1258037968655117} +12/28/2021 21:45:17 - INFO - codeparrot_training - Step 45125: {'lr': 1.1717696038537006e-05, 'samples': 23104512, 'steps': 45125, 'batch_loss/train': 0.7656624419614673} +12/28/2021 21:45:28 - INFO - codeparrot_training - Step 45126: {'lr': 1.1712927162447512e-05, 'samples': 23105024, 'steps': 45126, 'batch_loss/train': 0.8196420883759856} +12/28/2021 21:45:38 - INFO - codeparrot_training - Step 45127: {'lr': 1.1708159233697529e-05, 'samples': 23105536, 'steps': 45127, 'batch_loss/train': 0.6996498496737331} +12/28/2021 21:45:51 - INFO - codeparrot_training - Step 45128: {'lr': 1.1703392252305906e-05, 'samples': 23106048, 'steps': 45128, 'batch_loss/train': 0.6960059977136552} +12/28/2021 21:46:01 - INFO - codeparrot_training - Step 45129: {'lr': 1.1698626218291596e-05, 'samples': 23106560, 'steps': 45129, 'batch_loss/train': 0.7600269811227918} +12/28/2021 21:46:12 - INFO - codeparrot_training - Step 45130: {'lr': 1.169386113167356e-05, 'samples': 23107072, 'steps': 45130, 'batch_loss/train': 0.7185526052489877} +12/28/2021 21:46:22 - INFO - codeparrot_training - Step 45131: {'lr': 1.1689096992470783e-05, 'samples': 23107584, 'steps': 45131, 'batch_loss/train': 0.6830867105163634} +12/28/2021 21:46:35 - INFO - codeparrot_training - Step 45132: {'lr': 1.1684333800702135e-05, 'samples': 23108096, 'steps': 45132, 'batch_loss/train': 0.6919168028980494} +12/28/2021 21:46:45 - INFO - codeparrot_training - Step 45133: {'lr': 1.167957155638663e-05, 'samples': 23108608, 'steps': 45133, 'batch_loss/train': 0.6780005036853254} +12/28/2021 21:46:56 - INFO - codeparrot_training - Step 45134: {'lr': 1.1674810259543144e-05, 'samples': 23109120, 'steps': 45134, 'batch_loss/train': 0.5672831028932706} +12/28/2021 21:47:10 - INFO - codeparrot_training - Step 45135: {'lr': 1.1670049910190633e-05, 'samples': 23109632, 'steps': 45135, 'batch_loss/train': 0.5928733334876597} +12/28/2021 21:47:21 - INFO - codeparrot_training - Step 45136: {'lr': 1.1665290508348025e-05, 'samples': 23110144, 'steps': 45136, 'batch_loss/train': 0.7247302439063787} +12/28/2021 21:47:31 - INFO - codeparrot_training - Step 45137: {'lr': 1.1660532054034196e-05, 'samples': 23110656, 'steps': 45137, 'batch_loss/train': 0.6199493641033769} +12/28/2021 21:47:43 - INFO - codeparrot_training - Step 45138: {'lr': 1.1655774547268099e-05, 'samples': 23111168, 'steps': 45138, 'batch_loss/train': 0.6074463874101639} +12/28/2021 21:47:54 - INFO - codeparrot_training - Step 45139: {'lr': 1.1651017988068696e-05, 'samples': 23111680, 'steps': 45139, 'batch_loss/train': 0.7497723158448935} +12/28/2021 21:48:05 - INFO - codeparrot_training - Step 45140: {'lr': 1.164626237645483e-05, 'samples': 23112192, 'steps': 45140, 'batch_loss/train': 0.7725201440043747} +12/28/2021 21:48:15 - INFO - codeparrot_training - Step 45141: {'lr': 1.1641507712445403e-05, 'samples': 23112704, 'steps': 45141, 'batch_loss/train': 0.5392035723198205} +12/28/2021 21:48:27 - INFO - codeparrot_training - Step 45142: {'lr': 1.1636753996059346e-05, 'samples': 23113216, 'steps': 45142, 'batch_loss/train': 0.6276215150719509} +12/28/2021 21:48:38 - INFO - codeparrot_training - Step 45143: {'lr': 1.1632001227315558e-05, 'samples': 23113728, 'steps': 45143, 'batch_loss/train': 0.677175990305841} +12/28/2021 21:48:49 - INFO - codeparrot_training - Step 45144: {'lr': 1.1627249406232916e-05, 'samples': 23114240, 'steps': 45144, 'batch_loss/train': 0.779763444326818} +12/28/2021 21:49:03 - INFO - codeparrot_training - Step 45145: {'lr': 1.1622498532830345e-05, 'samples': 23114752, 'steps': 45145, 'batch_loss/train': 0.6233625188469887} +12/28/2021 21:49:13 - INFO - codeparrot_training - Step 45146: {'lr': 1.1617748607126693e-05, 'samples': 23115264, 'steps': 45146, 'batch_loss/train': 0.6595634799450636} +12/28/2021 21:49:24 - INFO - codeparrot_training - Step 45147: {'lr': 1.1612999629140863e-05, 'samples': 23115776, 'steps': 45147, 'batch_loss/train': 0.7605407368391752} +12/28/2021 21:49:36 - INFO - codeparrot_training - Step 45148: {'lr': 1.1608251598891757e-05, 'samples': 23116288, 'steps': 45148, 'batch_loss/train': 0.723977105692029} +12/28/2021 21:49:47 - INFO - codeparrot_training - Step 45149: {'lr': 1.1603504516398216e-05, 'samples': 23116800, 'steps': 45149, 'batch_loss/train': 0.9458010466769338} +12/28/2021 21:49:58 - INFO - codeparrot_training - Step 45150: {'lr': 1.1598758381679148e-05, 'samples': 23117312, 'steps': 45150, 'batch_loss/train': 0.6989142610691488} +12/28/2021 21:50:08 - INFO - codeparrot_training - Step 45151: {'lr': 1.1594013194753366e-05, 'samples': 23117824, 'steps': 45151, 'batch_loss/train': 0.776330485008657} +12/28/2021 21:50:20 - INFO - codeparrot_training - Step 45152: {'lr': 1.1589268955639747e-05, 'samples': 23118336, 'steps': 45152, 'batch_loss/train': 0.741174004971981} +12/28/2021 21:50:31 - INFO - codeparrot_training - Step 45153: {'lr': 1.1584525664357249e-05, 'samples': 23118848, 'steps': 45153, 'batch_loss/train': 0.8118411167524755} +12/28/2021 21:50:42 - INFO - codeparrot_training - Step 45154: {'lr': 1.157978332092463e-05, 'samples': 23119360, 'steps': 45154, 'batch_loss/train': 0.710104389116168} +12/28/2021 21:50:54 - INFO - codeparrot_training - Step 45155: {'lr': 1.157504192536074e-05, 'samples': 23119872, 'steps': 45155, 'batch_loss/train': 0.7094334170687944} +12/28/2021 21:51:05 - INFO - codeparrot_training - Step 45156: {'lr': 1.157030147768448e-05, 'samples': 23120384, 'steps': 45156, 'batch_loss/train': 0.7496247170493007} +12/28/2021 21:51:15 - INFO - codeparrot_training - Step 45157: {'lr': 1.156556197791464e-05, 'samples': 23120896, 'steps': 45157, 'batch_loss/train': 0.7458471427671611} +12/28/2021 21:51:29 - INFO - codeparrot_training - Step 45158: {'lr': 1.1560823426070121e-05, 'samples': 23121408, 'steps': 45158, 'batch_loss/train': 0.6419788913335651} +12/28/2021 21:51:40 - INFO - codeparrot_training - Step 45159: {'lr': 1.1556085822169744e-05, 'samples': 23121920, 'steps': 45159, 'batch_loss/train': 0.7882645162753761} +12/28/2021 21:51:51 - INFO - codeparrot_training - Step 45160: {'lr': 1.1551349166232323e-05, 'samples': 23122432, 'steps': 45160, 'batch_loss/train': 0.7543531587580219} +12/28/2021 21:52:01 - INFO - codeparrot_training - Step 45161: {'lr': 1.1546613458276706e-05, 'samples': 23122944, 'steps': 45161, 'batch_loss/train': 0.6978685674257576} +12/28/2021 21:52:14 - INFO - codeparrot_training - Step 45162: {'lr': 1.1541878698321767e-05, 'samples': 23123456, 'steps': 45162, 'batch_loss/train': 0.6429912850726396} +12/28/2021 21:52:24 - INFO - codeparrot_training - Step 45163: {'lr': 1.1537144886386186e-05, 'samples': 23123968, 'steps': 45163, 'batch_loss/train': 0.8312504030764103} +12/28/2021 21:52:35 - INFO - codeparrot_training - Step 45164: {'lr': 1.1532412022488919e-05, 'samples': 23124480, 'steps': 45164, 'batch_loss/train': 0.7599345003254712} +12/28/2021 21:52:49 - INFO - codeparrot_training - Step 45165: {'lr': 1.1527680106648785e-05, 'samples': 23124992, 'steps': 45165, 'batch_loss/train': 0.5350348016363569} +12/28/2021 21:52:59 - INFO - codeparrot_training - Step 45166: {'lr': 1.152294913888452e-05, 'samples': 23125504, 'steps': 45166, 'batch_loss/train': 0.6943556689657271} +12/28/2021 21:53:10 - INFO - codeparrot_training - Step 45167: {'lr': 1.1518219119214911e-05, 'samples': 23126016, 'steps': 45167, 'batch_loss/train': 0.7792547666467726} +12/28/2021 21:53:22 - INFO - codeparrot_training - Step 45168: {'lr': 1.1513490047658892e-05, 'samples': 23126528, 'steps': 45168, 'batch_loss/train': 0.6440566085802857} +12/28/2021 21:53:33 - INFO - codeparrot_training - Step 45169: {'lr': 1.1508761924235168e-05, 'samples': 23127040, 'steps': 45169, 'batch_loss/train': 0.6070592103642412} +12/28/2021 21:53:44 - INFO - codeparrot_training - Step 45170: {'lr': 1.1504034748962527e-05, 'samples': 23127552, 'steps': 45170, 'batch_loss/train': 0.7517749206162989} +12/28/2021 21:53:54 - INFO - codeparrot_training - Step 45171: {'lr': 1.149930852185982e-05, 'samples': 23128064, 'steps': 45171, 'batch_loss/train': 0.9618778033182025} +12/28/2021 21:54:06 - INFO - codeparrot_training - Step 45172: {'lr': 1.1494583242945777e-05, 'samples': 23128576, 'steps': 45172, 'batch_loss/train': 0.7103884099051356} +12/28/2021 21:54:17 - INFO - codeparrot_training - Step 45173: {'lr': 1.1489858912239248e-05, 'samples': 23129088, 'steps': 45173, 'batch_loss/train': 0.6807679194025695} +12/28/2021 21:54:28 - INFO - codeparrot_training - Step 45174: {'lr': 1.1485135529758966e-05, 'samples': 23129600, 'steps': 45174, 'batch_loss/train': 0.6988634196459316} +12/28/2021 21:54:42 - INFO - codeparrot_training - Step 45175: {'lr': 1.1480413095523722e-05, 'samples': 23130112, 'steps': 45175, 'batch_loss/train': 0.7225189578020945} +12/28/2021 21:54:53 - INFO - codeparrot_training - Step 45176: {'lr': 1.147569160955228e-05, 'samples': 23130624, 'steps': 45176, 'batch_loss/train': 0.602459106448805} +12/28/2021 21:55:03 - INFO - codeparrot_training - Step 45177: {'lr': 1.1470971071863485e-05, 'samples': 23131136, 'steps': 45177, 'batch_loss/train': 0.8836542330682278} +12/28/2021 21:55:15 - INFO - codeparrot_training - Step 45178: {'lr': 1.1466251482475959e-05, 'samples': 23131648, 'steps': 45178, 'batch_loss/train': 0.7286112722940743} +12/28/2021 21:55:26 - INFO - codeparrot_training - Step 45179: {'lr': 1.1461532841408634e-05, 'samples': 23132160, 'steps': 45179, 'batch_loss/train': 0.6215958904940635} +12/28/2021 21:55:36 - INFO - codeparrot_training - Step 45180: {'lr': 1.1456815148680133e-05, 'samples': 23132672, 'steps': 45180, 'batch_loss/train': 0.6652057596365921} +12/28/2021 21:55:47 - INFO - codeparrot_training - Step 45181: {'lr': 1.1452098404309303e-05, 'samples': 23133184, 'steps': 45181, 'batch_loss/train': 0.6901094354689121} +12/28/2021 21:55:59 - INFO - codeparrot_training - Step 45182: {'lr': 1.1447382608314821e-05, 'samples': 23133696, 'steps': 45182, 'batch_loss/train': 0.7181781842373312} +12/28/2021 21:56:10 - INFO - codeparrot_training - Step 45183: {'lr': 1.144266776071548e-05, 'samples': 23134208, 'steps': 45183, 'batch_loss/train': 0.6281738271936774} +12/28/2021 21:56:21 - INFO - codeparrot_training - Step 45184: {'lr': 1.1437953861530015e-05, 'samples': 23134720, 'steps': 45184, 'batch_loss/train': 0.7714589834213257} +12/28/2021 21:56:33 - INFO - codeparrot_training - Step 45185: {'lr': 1.1433240910777159e-05, 'samples': 23135232, 'steps': 45185, 'batch_loss/train': 0.7533157514408231} +12/28/2021 21:56:44 - INFO - codeparrot_training - Step 45186: {'lr': 1.1428528908475678e-05, 'samples': 23135744, 'steps': 45186, 'batch_loss/train': 0.6851532143191434} +12/28/2021 21:56:55 - INFO - codeparrot_training - Step 45187: {'lr': 1.1423817854644276e-05, 'samples': 23136256, 'steps': 45187, 'batch_loss/train': 0.7251094508683309} +12/28/2021 21:57:07 - INFO - codeparrot_training - Step 45188: {'lr': 1.1419107749301716e-05, 'samples': 23136768, 'steps': 45188, 'batch_loss/train': 0.7406473859446123} +12/28/2021 21:57:18 - INFO - codeparrot_training - Step 45189: {'lr': 1.1414398592466651e-05, 'samples': 23137280, 'steps': 45189, 'batch_loss/train': 0.6781553609762341} +12/28/2021 21:57:28 - INFO - codeparrot_training - Step 45190: {'lr': 1.1409690384157873e-05, 'samples': 23137792, 'steps': 45190, 'batch_loss/train': 0.7917155711911619} +12/28/2021 21:57:39 - INFO - codeparrot_training - Step 45191: {'lr': 1.1404983124394114e-05, 'samples': 23138304, 'steps': 45191, 'batch_loss/train': 0.695106367697008} +12/28/2021 21:57:51 - INFO - codeparrot_training - Step 45192: {'lr': 1.1400276813194028e-05, 'samples': 23138816, 'steps': 45192, 'batch_loss/train': 0.7419019603985362} +12/28/2021 21:58:02 - INFO - codeparrot_training - Step 45193: {'lr': 1.1395571450576292e-05, 'samples': 23139328, 'steps': 45193, 'batch_loss/train': 0.482095543819014} +12/28/2021 21:58:13 - INFO - codeparrot_training - Step 45194: {'lr': 1.1390867036559754e-05, 'samples': 23139840, 'steps': 45194, 'batch_loss/train': 0.6520025851204991} +12/28/2021 21:58:25 - INFO - codeparrot_training - Step 45195: {'lr': 1.1386163571162983e-05, 'samples': 23140352, 'steps': 45195, 'batch_loss/train': 0.6111841504462063} +12/28/2021 21:58:35 - INFO - codeparrot_training - Step 45196: {'lr': 1.1381461054404768e-05, 'samples': 23140864, 'steps': 45196, 'batch_loss/train': 0.5653514165896922} +12/28/2021 21:58:46 - INFO - codeparrot_training - Step 45197: {'lr': 1.1376759486303733e-05, 'samples': 23141376, 'steps': 45197, 'batch_loss/train': 0.6556123597547412} +12/28/2021 21:59:00 - INFO - codeparrot_training - Step 45198: {'lr': 1.1372058866878588e-05, 'samples': 23141888, 'steps': 45198, 'batch_loss/train': 0.7248354586772621} +12/28/2021 21:59:11 - INFO - codeparrot_training - Step 45199: {'lr': 1.1367359196148063e-05, 'samples': 23142400, 'steps': 45199, 'batch_loss/train': 0.7011120608076453} +12/28/2021 21:59:21 - INFO - codeparrot_training - Step 45200: {'lr': 1.1362660474130815e-05, 'samples': 23142912, 'steps': 45200, 'batch_loss/train': 0.7146652820520103} +12/28/2021 21:59:33 - INFO - codeparrot_training - Step 45201: {'lr': 1.135796270084552e-05, 'samples': 23143424, 'steps': 45201, 'batch_loss/train': 0.6802255888469517} +12/28/2021 21:59:44 - INFO - codeparrot_training - Step 45202: {'lr': 1.135326587631083e-05, 'samples': 23143936, 'steps': 45202, 'batch_loss/train': 0.8390423152595758} +12/28/2021 21:59:55 - INFO - codeparrot_training - Step 45203: {'lr': 1.1348570000545511e-05, 'samples': 23144448, 'steps': 45203, 'batch_loss/train': 0.6800969741307199} +12/28/2021 22:00:05 - INFO - codeparrot_training - Step 45204: {'lr': 1.1343875073568072e-05, 'samples': 23144960, 'steps': 45204, 'batch_loss/train': 0.6721721105277538} +12/28/2021 22:00:23 - INFO - codeparrot_training - Step 45205: {'lr': 1.1339181095397361e-05, 'samples': 23145472, 'steps': 45205, 'batch_loss/train': 0.718182299984619} +12/28/2021 22:00:34 - INFO - codeparrot_training - Step 45206: {'lr': 1.1334488066051917e-05, 'samples': 23145984, 'steps': 45206, 'batch_loss/train': 0.7300790552981198} +12/28/2021 22:00:44 - INFO - codeparrot_training - Step 45207: {'lr': 1.1329795985550396e-05, 'samples': 23146496, 'steps': 45207, 'batch_loss/train': 0.6861814074218273} +12/28/2021 22:00:57 - INFO - codeparrot_training - Step 45208: {'lr': 1.1325104853911555e-05, 'samples': 23147008, 'steps': 45208, 'batch_loss/train': 0.665014612255618} +12/28/2021 22:01:07 - INFO - codeparrot_training - Step 45209: {'lr': 1.132041467115394e-05, 'samples': 23147520, 'steps': 45209, 'batch_loss/train': 0.7433960535563529} +12/28/2021 22:01:18 - INFO - codeparrot_training - Step 45210: {'lr': 1.1315725437296227e-05, 'samples': 23148032, 'steps': 45210, 'batch_loss/train': 0.6775140275713056} +12/28/2021 22:01:30 - INFO - codeparrot_training - Step 45211: {'lr': 1.1311037152357068e-05, 'samples': 23148544, 'steps': 45211, 'batch_loss/train': 0.7709558987990022} +12/28/2021 22:01:41 - INFO - codeparrot_training - Step 45212: {'lr': 1.1306349816355116e-05, 'samples': 23149056, 'steps': 45212, 'batch_loss/train': 0.6586386940907687} +12/28/2021 22:01:51 - INFO - codeparrot_training - Step 45213: {'lr': 1.1301663429308967e-05, 'samples': 23149568, 'steps': 45213, 'batch_loss/train': 0.6813489380292594} +12/28/2021 22:02:02 - INFO - codeparrot_training - Step 45214: {'lr': 1.1296977991237328e-05, 'samples': 23150080, 'steps': 45214, 'batch_loss/train': 0.6840239576995373} +12/28/2021 22:02:16 - INFO - codeparrot_training - Step 45215: {'lr': 1.1292293502158712e-05, 'samples': 23150592, 'steps': 45215, 'batch_loss/train': 0.8789694784209132} +12/28/2021 22:02:26 - INFO - codeparrot_training - Step 45216: {'lr': 1.1287609962091827e-05, 'samples': 23151104, 'steps': 45216, 'batch_loss/train': 0.7519833836704493} +12/28/2021 22:02:37 - INFO - codeparrot_training - Step 45217: {'lr': 1.1282927371055295e-05, 'samples': 23151616, 'steps': 45217, 'batch_loss/train': 0.7265521013177931} +12/28/2021 22:02:49 - INFO - codeparrot_training - Step 45218: {'lr': 1.1278245729067688e-05, 'samples': 23152128, 'steps': 45218, 'batch_loss/train': 0.7645460017956793} +12/28/2021 22:03:00 - INFO - codeparrot_training - Step 45219: {'lr': 1.1273565036147598e-05, 'samples': 23152640, 'steps': 45219, 'batch_loss/train': 0.7092871246859431} +12/28/2021 22:03:11 - INFO - codeparrot_training - Step 45220: {'lr': 1.1268885292313735e-05, 'samples': 23153152, 'steps': 45220, 'batch_loss/train': 0.7705654481542297} +12/28/2021 22:03:24 - INFO - codeparrot_training - Step 45221: {'lr': 1.1264206497584612e-05, 'samples': 23153664, 'steps': 45221, 'batch_loss/train': 0.7733873687684536} +12/28/2021 22:03:35 - INFO - codeparrot_training - Step 45222: {'lr': 1.125952865197885e-05, 'samples': 23154176, 'steps': 45222, 'batch_loss/train': 0.71188008133322} +12/28/2021 22:03:46 - INFO - codeparrot_training - Step 45223: {'lr': 1.1254851755515105e-05, 'samples': 23154688, 'steps': 45223, 'batch_loss/train': 0.7335936911404133} +12/28/2021 22:03:56 - INFO - codeparrot_training - Step 45224: {'lr': 1.1250175808211888e-05, 'samples': 23155200, 'steps': 45224, 'batch_loss/train': 0.7349751787260175} +12/28/2021 22:04:08 - INFO - codeparrot_training - Step 45225: {'lr': 1.1245500810087822e-05, 'samples': 23155712, 'steps': 45225, 'batch_loss/train': 0.7380467001348734} +12/28/2021 22:04:19 - INFO - codeparrot_training - Step 45226: {'lr': 1.1240826761161505e-05, 'samples': 23156224, 'steps': 45226, 'batch_loss/train': 0.7355642057955265} +12/28/2021 22:04:29 - INFO - codeparrot_training - Step 45227: {'lr': 1.1236153661451477e-05, 'samples': 23156736, 'steps': 45227, 'batch_loss/train': 0.6533847944810987} +12/28/2021 22:04:42 - INFO - codeparrot_training - Step 45228: {'lr': 1.1231481510976388e-05, 'samples': 23157248, 'steps': 45228, 'batch_loss/train': 0.8815470701083541} +12/28/2021 22:04:52 - INFO - codeparrot_training - Step 45229: {'lr': 1.1226810309754782e-05, 'samples': 23157760, 'steps': 45229, 'batch_loss/train': 0.728433322859928} +12/28/2021 22:05:03 - INFO - codeparrot_training - Step 45230: {'lr': 1.1222140057805169e-05, 'samples': 23158272, 'steps': 45230, 'batch_loss/train': 0.6192574314773083} +12/28/2021 22:05:15 - INFO - codeparrot_training - Step 45231: {'lr': 1.121747075514623e-05, 'samples': 23158784, 'steps': 45231, 'batch_loss/train': 0.5502081043669023} +12/28/2021 22:05:26 - INFO - codeparrot_training - Step 45232: {'lr': 1.1212802401796424e-05, 'samples': 23159296, 'steps': 45232, 'batch_loss/train': 0.7050936496816576} +12/28/2021 22:05:37 - INFO - codeparrot_training - Step 45233: {'lr': 1.1208134997774344e-05, 'samples': 23159808, 'steps': 45233, 'batch_loss/train': 0.7237762650474906} +12/28/2021 22:05:51 - INFO - codeparrot_training - Step 45234: {'lr': 1.1203468543098589e-05, 'samples': 23160320, 'steps': 45234, 'batch_loss/train': 0.6724340398795903} +12/28/2021 22:06:02 - INFO - codeparrot_training - Step 45235: {'lr': 1.119880303778767e-05, 'samples': 23160832, 'steps': 45235, 'batch_loss/train': 0.8753606639802456} +12/28/2021 22:06:13 - INFO - codeparrot_training - Step 45236: {'lr': 1.1194138481860127e-05, 'samples': 23161344, 'steps': 45236, 'batch_loss/train': 0.6808420117013156} +12/28/2021 22:06:23 - INFO - codeparrot_training - Step 45237: {'lr': 1.1189474875334533e-05, 'samples': 23161856, 'steps': 45237, 'batch_loss/train': 0.6573677156120539} +12/28/2021 22:06:36 - INFO - codeparrot_training - Step 45238: {'lr': 1.1184812218229395e-05, 'samples': 23162368, 'steps': 45238, 'batch_loss/train': 0.9554148181341588} +12/28/2021 22:06:47 - INFO - codeparrot_training - Step 45239: {'lr': 1.1180150510563287e-05, 'samples': 23162880, 'steps': 45239, 'batch_loss/train': 1.0799186201766133} +12/28/2021 22:06:57 - INFO - codeparrot_training - Step 45240: {'lr': 1.1175489752354772e-05, 'samples': 23163392, 'steps': 45240, 'batch_loss/train': 0.8932139687240124} +12/28/2021 22:07:09 - INFO - codeparrot_training - Step 45241: {'lr': 1.117082994362223e-05, 'samples': 23163904, 'steps': 45241, 'batch_loss/train': 0.6179465335444547} +12/28/2021 22:07:20 - INFO - codeparrot_training - Step 45242: {'lr': 1.1166171084384364e-05, 'samples': 23164416, 'steps': 45242, 'batch_loss/train': 0.6919245892204344} +12/28/2021 22:07:31 - INFO - codeparrot_training - Step 45243: {'lr': 1.1161513174659632e-05, 'samples': 23164928, 'steps': 45243, 'batch_loss/train': 0.6826542783528566} +12/28/2021 22:07:41 - INFO - codeparrot_training - Step 45244: {'lr': 1.1156856214466465e-05, 'samples': 23165440, 'steps': 45244, 'batch_loss/train': 0.7390599376522005} +12/28/2021 22:07:55 - INFO - codeparrot_training - Step 45245: {'lr': 1.1152200203823515e-05, 'samples': 23165952, 'steps': 45245, 'batch_loss/train': 0.7221329691819847} +12/28/2021 22:08:06 - INFO - codeparrot_training - Step 45246: {'lr': 1.1147545142749239e-05, 'samples': 23166464, 'steps': 45246, 'batch_loss/train': 0.7685714890249074} +12/28/2021 22:08:17 - INFO - codeparrot_training - Step 45247: {'lr': 1.1142891031262121e-05, 'samples': 23166976, 'steps': 45247, 'batch_loss/train': 0.8230871669948101} +12/28/2021 22:08:29 - INFO - codeparrot_training - Step 45248: {'lr': 1.1138237869380647e-05, 'samples': 23167488, 'steps': 45248, 'batch_loss/train': 0.7140512084588408} +12/28/2021 22:08:40 - INFO - codeparrot_training - Step 45249: {'lr': 1.1133585657123385e-05, 'samples': 23168000, 'steps': 45249, 'batch_loss/train': 0.6738131381571293} +12/28/2021 22:08:50 - INFO - codeparrot_training - Step 45250: {'lr': 1.1128934394508794e-05, 'samples': 23168512, 'steps': 45250, 'batch_loss/train': 0.7170391110703349} +12/28/2021 22:09:04 - INFO - codeparrot_training - Step 45251: {'lr': 1.1124284081555359e-05, 'samples': 23169024, 'steps': 45251, 'batch_loss/train': 0.6984525015577674} +12/28/2021 22:09:15 - INFO - codeparrot_training - Step 45252: {'lr': 1.1119634718281563e-05, 'samples': 23169536, 'steps': 45252, 'batch_loss/train': 0.7021812271559611} +12/28/2021 22:09:25 - INFO - codeparrot_training - Step 45253: {'lr': 1.1114986304705892e-05, 'samples': 23170048, 'steps': 45253, 'batch_loss/train': 0.6724969043862075} +12/28/2021 22:09:36 - INFO - codeparrot_training - Step 45254: {'lr': 1.1110338840846862e-05, 'samples': 23170560, 'steps': 45254, 'batch_loss/train': 0.7499102395959198} +12/28/2021 22:09:48 - INFO - codeparrot_training - Step 45255: {'lr': 1.1105692326722955e-05, 'samples': 23171072, 'steps': 45255, 'batch_loss/train': 0.7043064082972705} +12/28/2021 22:09:59 - INFO - codeparrot_training - Step 45256: {'lr': 1.1101046762352546e-05, 'samples': 23171584, 'steps': 45256, 'batch_loss/train': 0.7961749751702882} +12/28/2021 22:10:09 - INFO - codeparrot_training - Step 45257: {'lr': 1.1096402147754176e-05, 'samples': 23172096, 'steps': 45257, 'batch_loss/train': 0.6600732328370214} +12/28/2021 22:10:21 - INFO - codeparrot_training - Step 45258: {'lr': 1.1091758482946356e-05, 'samples': 23172608, 'steps': 45258, 'batch_loss/train': 0.6515343161299825} +12/28/2021 22:10:32 - INFO - codeparrot_training - Step 45259: {'lr': 1.1087115767947436e-05, 'samples': 23173120, 'steps': 45259, 'batch_loss/train': 0.6001659393077716} +12/28/2021 22:10:43 - INFO - codeparrot_training - Step 45260: {'lr': 1.1082474002776011e-05, 'samples': 23173632, 'steps': 45260, 'batch_loss/train': 0.7552105141803622} +12/28/2021 22:10:57 - INFO - codeparrot_training - Step 45261: {'lr': 1.1077833187450398e-05, 'samples': 23174144, 'steps': 45261, 'batch_loss/train': 0.5725286195520312} +12/28/2021 22:11:07 - INFO - codeparrot_training - Step 45262: {'lr': 1.107319332198911e-05, 'samples': 23174656, 'steps': 45262, 'batch_loss/train': 0.7894206577911973} +12/28/2021 22:11:18 - INFO - codeparrot_training - Step 45263: {'lr': 1.1068554406410608e-05, 'samples': 23175168, 'steps': 45263, 'batch_loss/train': 0.8380233468487859} +12/28/2021 22:11:29 - INFO - codeparrot_training - Step 45264: {'lr': 1.106391644073329e-05, 'samples': 23175680, 'steps': 45264, 'batch_loss/train': 0.7534953807480633} +12/28/2021 22:11:41 - INFO - codeparrot_training - Step 45265: {'lr': 1.1059279424975644e-05, 'samples': 23176192, 'steps': 45265, 'batch_loss/train': 0.9111900036223233} +12/28/2021 22:11:52 - INFO - codeparrot_training - Step 45266: {'lr': 1.105464335915607e-05, 'samples': 23176704, 'steps': 45266, 'batch_loss/train': 0.7783919349312782} +12/28/2021 22:12:02 - INFO - codeparrot_training - Step 45267: {'lr': 1.1050008243293026e-05, 'samples': 23177216, 'steps': 45267, 'batch_loss/train': 0.7054474800825119} +12/28/2021 22:12:16 - INFO - codeparrot_training - Step 45268: {'lr': 1.1045374077404913e-05, 'samples': 23177728, 'steps': 45268, 'batch_loss/train': 0.7188941463828087} +12/28/2021 22:12:27 - INFO - codeparrot_training - Step 45269: {'lr': 1.1040740861510218e-05, 'samples': 23178240, 'steps': 45269, 'batch_loss/train': 0.8300011469982564} +12/28/2021 22:12:38 - INFO - codeparrot_training - Step 45270: {'lr': 1.103610859562723e-05, 'samples': 23178752, 'steps': 45270, 'batch_loss/train': 0.7060732033569366} +12/28/2021 22:12:48 - INFO - codeparrot_training - Step 45271: {'lr': 1.1031477279774466e-05, 'samples': 23179264, 'steps': 45271, 'batch_loss/train': 0.7719943816773593} +12/28/2021 22:13:00 - INFO - codeparrot_training - Step 45272: {'lr': 1.1026846913970351e-05, 'samples': 23179776, 'steps': 45272, 'batch_loss/train': 0.6992597542703152} +12/28/2021 22:13:11 - INFO - codeparrot_training - Step 45273: {'lr': 1.1022217498233234e-05, 'samples': 23180288, 'steps': 45273, 'batch_loss/train': 1.4651657892391086} +12/28/2021 22:13:22 - INFO - codeparrot_training - Step 45274: {'lr': 1.1017589032581543e-05, 'samples': 23180800, 'steps': 45274, 'batch_loss/train': 0.7581694712862372} +12/28/2021 22:13:34 - INFO - codeparrot_training - Step 45275: {'lr': 1.101296151703371e-05, 'samples': 23181312, 'steps': 45275, 'batch_loss/train': 0.6235096626915038} +12/28/2021 22:13:44 - INFO - codeparrot_training - Step 45276: {'lr': 1.100833495160808e-05, 'samples': 23181824, 'steps': 45276, 'batch_loss/train': 0.7271245773881674} +12/28/2021 22:13:55 - INFO - codeparrot_training - Step 45277: {'lr': 1.100370933632308e-05, 'samples': 23182336, 'steps': 45277, 'batch_loss/train': 0.7586483911727555} +12/28/2021 22:14:07 - INFO - codeparrot_training - Step 45278: {'lr': 1.0999084671197063e-05, 'samples': 23182848, 'steps': 45278, 'batch_loss/train': 0.7134393873857334} +12/28/2021 22:14:18 - INFO - codeparrot_training - Step 45279: {'lr': 1.0994460956248481e-05, 'samples': 23183360, 'steps': 45279, 'batch_loss/train': 0.632844238076359} +12/28/2021 22:14:28 - INFO - codeparrot_training - Step 45280: {'lr': 1.0989838191495655e-05, 'samples': 23183872, 'steps': 45280, 'batch_loss/train': 0.7187032364308834} +12/28/2021 22:14:39 - INFO - codeparrot_training - Step 45281: {'lr': 1.0985216376956985e-05, 'samples': 23184384, 'steps': 45281, 'batch_loss/train': 0.7205729158595204} +12/28/2021 22:14:53 - INFO - codeparrot_training - Step 45282: {'lr': 1.0980595512650848e-05, 'samples': 23184896, 'steps': 45282, 'batch_loss/train': 0.7506268839351833} +12/28/2021 22:15:04 - INFO - codeparrot_training - Step 45283: {'lr': 1.0975975598595617e-05, 'samples': 23185408, 'steps': 45283, 'batch_loss/train': 0.7629040649626404} +12/28/2021 22:15:14 - INFO - codeparrot_training - Step 45284: {'lr': 1.0971356634809692e-05, 'samples': 23185920, 'steps': 45284, 'batch_loss/train': 0.6816213498823345} +12/28/2021 22:15:27 - INFO - codeparrot_training - Step 45285: {'lr': 1.0966738621311311e-05, 'samples': 23186432, 'steps': 45285, 'batch_loss/train': 0.6695514561142772} +12/28/2021 22:15:38 - INFO - codeparrot_training - Step 45286: {'lr': 1.0962121558119015e-05, 'samples': 23186944, 'steps': 45286, 'batch_loss/train': 0.7937301495112479} +12/28/2021 22:15:48 - INFO - codeparrot_training - Step 45287: {'lr': 1.0957505445251009e-05, 'samples': 23187456, 'steps': 45287, 'batch_loss/train': 0.6989519577473402} +12/28/2021 22:16:00 - INFO - codeparrot_training - Step 45288: {'lr': 1.0952890282725724e-05, 'samples': 23187968, 'steps': 45288, 'batch_loss/train': 1.0939832422882318} +12/28/2021 22:16:11 - INFO - codeparrot_training - Step 45289: {'lr': 1.0948276070561481e-05, 'samples': 23188480, 'steps': 45289, 'batch_loss/train': 0.8505614250898361} +12/28/2021 22:16:22 - INFO - codeparrot_training - Step 45290: {'lr': 1.0943662808776622e-05, 'samples': 23188992, 'steps': 45290, 'batch_loss/train': 0.7349847466684878} +12/28/2021 22:16:32 - INFO - codeparrot_training - Step 45291: {'lr': 1.0939050497389497e-05, 'samples': 23189504, 'steps': 45291, 'batch_loss/train': 0.7196546790655702} +12/28/2021 22:16:46 - INFO - codeparrot_training - Step 45292: {'lr': 1.093443913641845e-05, 'samples': 23190016, 'steps': 45292, 'batch_loss/train': 0.6784994169138372} +12/28/2021 22:16:57 - INFO - codeparrot_training - Step 45293: {'lr': 1.0929828725881803e-05, 'samples': 23190528, 'steps': 45293, 'batch_loss/train': 0.5754673110204749} +12/28/2021 22:17:08 - INFO - codeparrot_training - Step 45294: {'lr': 1.0925219265797897e-05, 'samples': 23191040, 'steps': 45294, 'batch_loss/train': 0.4961165436543524} +12/28/2021 22:17:20 - INFO - codeparrot_training - Step 45295: {'lr': 1.0920610756185056e-05, 'samples': 23191552, 'steps': 45295, 'batch_loss/train': 0.6434111797716469} +12/28/2021 22:17:30 - INFO - codeparrot_training - Step 45296: {'lr': 1.0916003197061541e-05, 'samples': 23192064, 'steps': 45296, 'batch_loss/train': 0.8592402385547757} +12/28/2021 22:17:41 - INFO - codeparrot_training - Step 45297: {'lr': 1.0911396588445755e-05, 'samples': 23192576, 'steps': 45297, 'batch_loss/train': 0.5038390303379856} +12/28/2021 22:17:53 - INFO - codeparrot_training - Step 45298: {'lr': 1.0906790930356015e-05, 'samples': 23193088, 'steps': 45298, 'batch_loss/train': 0.6511128675192595} +12/28/2021 22:18:04 - INFO - codeparrot_training - Step 45299: {'lr': 1.0902186222810557e-05, 'samples': 23193600, 'steps': 45299, 'batch_loss/train': 0.9157065879553556} +12/28/2021 22:18:15 - INFO - codeparrot_training - Step 45300: {'lr': 1.0897582465827698e-05, 'samples': 23194112, 'steps': 45300, 'batch_loss/train': 0.7176357582211494} +12/28/2021 22:18:25 - INFO - codeparrot_training - Step 45301: {'lr': 1.0892979659425844e-05, 'samples': 23194624, 'steps': 45301, 'batch_loss/train': 0.7360463039949536} +12/28/2021 22:18:39 - INFO - codeparrot_training - Step 45302: {'lr': 1.0888377803623172e-05, 'samples': 23195136, 'steps': 45302, 'batch_loss/train': 0.6681429732125252} +12/28/2021 22:18:50 - INFO - codeparrot_training - Step 45303: {'lr': 1.088377689843803e-05, 'samples': 23195648, 'steps': 45303, 'batch_loss/train': 0.6851050448603928} +12/28/2021 22:19:01 - INFO - codeparrot_training - Step 45304: {'lr': 1.0879176943888709e-05, 'samples': 23196160, 'steps': 45304, 'batch_loss/train': 0.677665734430775} +12/28/2021 22:19:13 - INFO - codeparrot_training - Step 45305: {'lr': 1.0874577939993497e-05, 'samples': 23196672, 'steps': 45305, 'batch_loss/train': 0.7096738638356328} +12/28/2021 22:19:23 - INFO - codeparrot_training - Step 45306: {'lr': 1.086997988677066e-05, 'samples': 23197184, 'steps': 45306, 'batch_loss/train': 0.6497659147717059} +12/28/2021 22:19:34 - INFO - codeparrot_training - Step 45307: {'lr': 1.0865382784238487e-05, 'samples': 23197696, 'steps': 45307, 'batch_loss/train': 0.6789854351663962} +12/28/2021 22:19:46 - INFO - codeparrot_training - Step 45308: {'lr': 1.086078663241527e-05, 'samples': 23198208, 'steps': 45308, 'batch_loss/train': 0.7247230112552643} +12/28/2021 22:19:57 - INFO - codeparrot_training - Step 45309: {'lr': 1.0856191431319273e-05, 'samples': 23198720, 'steps': 45309, 'batch_loss/train': 0.6887982254847884} +12/28/2021 22:20:08 - INFO - codeparrot_training - Step 45310: {'lr': 1.0851597180968787e-05, 'samples': 23199232, 'steps': 45310, 'batch_loss/train': 0.5507680092414375} +12/28/2021 22:20:21 - INFO - codeparrot_training - Step 45311: {'lr': 1.084700388138199e-05, 'samples': 23199744, 'steps': 45311, 'batch_loss/train': 0.7541107328142971} +12/28/2021 22:20:32 - INFO - codeparrot_training - Step 45312: {'lr': 1.0842411532577229e-05, 'samples': 23200256, 'steps': 45312, 'batch_loss/train': 0.7128020357340574} +12/28/2021 22:20:43 - INFO - codeparrot_training - Step 45313: {'lr': 1.0837820134572767e-05, 'samples': 23200768, 'steps': 45313, 'batch_loss/train': 0.7255416731350124} +12/28/2021 22:20:53 - INFO - codeparrot_training - Step 45314: {'lr': 1.0833229687386814e-05, 'samples': 23201280, 'steps': 45314, 'batch_loss/train': 0.6962267503840849} +12/28/2021 22:21:06 - INFO - codeparrot_training - Step 45315: {'lr': 1.082864019103763e-05, 'samples': 23201792, 'steps': 45315, 'batch_loss/train': 0.7338845045305789} +12/28/2021 22:21:17 - INFO - codeparrot_training - Step 45316: {'lr': 1.0824051645543453e-05, 'samples': 23202304, 'steps': 45316, 'batch_loss/train': 0.6128290402702987} +12/28/2021 22:21:27 - INFO - codeparrot_training - Step 45317: {'lr': 1.0819464050922545e-05, 'samples': 23202816, 'steps': 45317, 'batch_loss/train': 0.6620603036135435} +12/28/2021 22:21:40 - INFO - codeparrot_training - Step 45318: {'lr': 1.0814877407193142e-05, 'samples': 23203328, 'steps': 45318, 'batch_loss/train': 0.5543960845097899} +12/28/2021 22:21:50 - INFO - codeparrot_training - Step 45319: {'lr': 1.081029171437345e-05, 'samples': 23203840, 'steps': 45319, 'batch_loss/train': 0.6914405198767781} +12/28/2021 22:22:01 - INFO - codeparrot_training - Step 45320: {'lr': 1.0805706972481761e-05, 'samples': 23204352, 'steps': 45320, 'batch_loss/train': 0.5540552381426096} +12/28/2021 22:22:14 - INFO - codeparrot_training - Step 45321: {'lr': 1.0801123181536283e-05, 'samples': 23204864, 'steps': 45321, 'batch_loss/train': 1.1100015873089433} +12/28/2021 22:22:25 - INFO - codeparrot_training - Step 45322: {'lr': 1.0796540341555138e-05, 'samples': 23205376, 'steps': 45322, 'batch_loss/train': 0.6207844359159935} +12/28/2021 22:22:35 - INFO - codeparrot_training - Step 45323: {'lr': 1.0791958452556677e-05, 'samples': 23205888, 'steps': 45323, 'batch_loss/train': 0.7627575974911451} +12/28/2021 22:22:46 - INFO - codeparrot_training - Step 45324: {'lr': 1.0787377514559076e-05, 'samples': 23206400, 'steps': 45324, 'batch_loss/train': 0.7517069960013032} +12/28/2021 22:22:59 - INFO - codeparrot_training - Step 45325: {'lr': 1.0782797527580546e-05, 'samples': 23206912, 'steps': 45325, 'batch_loss/train': 0.732842265162617} +12/28/2021 22:23:09 - INFO - codeparrot_training - Step 45326: {'lr': 1.0778218491639236e-05, 'samples': 23207424, 'steps': 45326, 'batch_loss/train': 0.7318932809866965} +12/28/2021 22:23:20 - INFO - codeparrot_training - Step 45327: {'lr': 1.0773640406753465e-05, 'samples': 23207936, 'steps': 45327, 'batch_loss/train': 0.7285827691666782} +12/28/2021 22:23:34 - INFO - codeparrot_training - Step 45328: {'lr': 1.0769063272941331e-05, 'samples': 23208448, 'steps': 45328, 'batch_loss/train': 0.484386034484487} +12/28/2021 22:23:44 - INFO - codeparrot_training - Step 45329: {'lr': 1.0764487090221097e-05, 'samples': 23208960, 'steps': 45329, 'batch_loss/train': 0.7220790479332209} +12/28/2021 22:23:55 - INFO - codeparrot_training - Step 45330: {'lr': 1.0759911858610915e-05, 'samples': 23209472, 'steps': 45330, 'batch_loss/train': 0.7424742095172405} +12/28/2021 22:24:07 - INFO - codeparrot_training - Step 45331: {'lr': 1.0755337578128994e-05, 'samples': 23209984, 'steps': 45331, 'batch_loss/train': 0.6534157157875597} +12/28/2021 22:24:18 - INFO - codeparrot_training - Step 45332: {'lr': 1.075076424879351e-05, 'samples': 23210496, 'steps': 45332, 'batch_loss/train': 0.648250441532582} +12/28/2021 22:24:29 - INFO - codeparrot_training - Step 45333: {'lr': 1.0746191870622646e-05, 'samples': 23211008, 'steps': 45333, 'batch_loss/train': 0.7088682139292359} +12/28/2021 22:24:39 - INFO - codeparrot_training - Step 45334: {'lr': 1.074162044363461e-05, 'samples': 23211520, 'steps': 45334, 'batch_loss/train': 0.6621065251529217} +12/28/2021 22:24:51 - INFO - codeparrot_training - Step 45335: {'lr': 1.0737049967847523e-05, 'samples': 23212032, 'steps': 45335, 'batch_loss/train': 0.8325257394462824} +12/28/2021 22:25:02 - INFO - codeparrot_training - Step 45336: {'lr': 1.0732480443279624e-05, 'samples': 23212544, 'steps': 45336, 'batch_loss/train': 0.7722415053285658} +12/28/2021 22:25:13 - INFO - codeparrot_training - Step 45337: {'lr': 1.0727911869948981e-05, 'samples': 23213056, 'steps': 45337, 'batch_loss/train': 0.6735509932041168} +12/28/2021 22:25:25 - INFO - codeparrot_training - Step 45338: {'lr': 1.072334424787383e-05, 'samples': 23213568, 'steps': 45338, 'batch_loss/train': 0.8288606273708865} +12/28/2021 22:25:36 - INFO - codeparrot_training - Step 45339: {'lr': 1.0718777577072348e-05, 'samples': 23214080, 'steps': 45339, 'batch_loss/train': 0.7626053616404533} +12/28/2021 22:25:46 - INFO - codeparrot_training - Step 45340: {'lr': 1.0714211857562634e-05, 'samples': 23214592, 'steps': 45340, 'batch_loss/train': 0.7257033863570541} +12/28/2021 22:26:01 - INFO - codeparrot_training - Step 45341: {'lr': 1.0709647089362867e-05, 'samples': 23215104, 'steps': 45341, 'batch_loss/train': 0.3537993535865098} +12/28/2021 22:26:12 - INFO - codeparrot_training - Step 45342: {'lr': 1.0705083272491201e-05, 'samples': 23215616, 'steps': 45342, 'batch_loss/train': 0.6063477366697043} +12/28/2021 22:26:23 - INFO - codeparrot_training - Step 45343: {'lr': 1.0700520406965758e-05, 'samples': 23216128, 'steps': 45343, 'batch_loss/train': 0.9455185933038592} +12/28/2021 22:26:33 - INFO - codeparrot_training - Step 45344: {'lr': 1.0695958492804664e-05, 'samples': 23216640, 'steps': 45344, 'batch_loss/train': 0.6993869263678789} +12/28/2021 22:26:45 - INFO - codeparrot_training - Step 45345: {'lr': 1.0691397530026126e-05, 'samples': 23217152, 'steps': 45345, 'batch_loss/train': 0.6959548979066312} +12/28/2021 22:26:56 - INFO - codeparrot_training - Step 45346: {'lr': 1.0686837518648213e-05, 'samples': 23217664, 'steps': 45346, 'batch_loss/train': 0.6812650999054313} +12/28/2021 22:27:07 - INFO - codeparrot_training - Step 45347: {'lr': 1.0682278458689048e-05, 'samples': 23218176, 'steps': 45347, 'batch_loss/train': 0.6890046981861815} +12/28/2021 22:27:19 - INFO - codeparrot_training - Step 45348: {'lr': 1.0677720350166814e-05, 'samples': 23218688, 'steps': 45348, 'batch_loss/train': 0.723866836167872} +12/28/2021 22:27:30 - INFO - codeparrot_training - Step 45349: {'lr': 1.0673163193099606e-05, 'samples': 23219200, 'steps': 45349, 'batch_loss/train': 0.796257097274065} +12/28/2021 22:27:40 - INFO - codeparrot_training - Step 45350: {'lr': 1.0668606987505546e-05, 'samples': 23219712, 'steps': 45350, 'batch_loss/train': 0.5265887342393398} +12/28/2021 22:27:51 - INFO - codeparrot_training - Step 45351: {'lr': 1.0664051733402708e-05, 'samples': 23220224, 'steps': 45351, 'batch_loss/train': 0.8924901681020856} +12/28/2021 22:28:05 - INFO - codeparrot_training - Step 45352: {'lr': 1.0659497430809184e-05, 'samples': 23220736, 'steps': 45352, 'batch_loss/train': 0.7565289498306811} +12/28/2021 22:28:16 - INFO - codeparrot_training - Step 45353: {'lr': 1.0654944079743211e-05, 'samples': 23221248, 'steps': 45353, 'batch_loss/train': 0.6236851648427546} +12/28/2021 22:28:26 - INFO - codeparrot_training - Step 45354: {'lr': 1.0650391680222776e-05, 'samples': 23221760, 'steps': 45354, 'batch_loss/train': 0.6939378841780126} +12/28/2021 22:28:39 - INFO - codeparrot_training - Step 45355: {'lr': 1.0645840232265973e-05, 'samples': 23222272, 'steps': 45355, 'batch_loss/train': 0.7403922802768648} +12/28/2021 22:28:49 - INFO - codeparrot_training - Step 45356: {'lr': 1.0641289735890985e-05, 'samples': 23222784, 'steps': 45356, 'batch_loss/train': 0.7942587197758257} +12/28/2021 22:29:00 - INFO - codeparrot_training - Step 45357: {'lr': 1.0636740191115824e-05, 'samples': 23223296, 'steps': 45357, 'batch_loss/train': 0.7010393578093499} +12/28/2021 22:29:12 - INFO - codeparrot_training - Step 45358: {'lr': 1.0632191597958613e-05, 'samples': 23223808, 'steps': 45358, 'batch_loss/train': 0.6156602972187102} +12/28/2021 22:29:23 - INFO - codeparrot_training - Step 45359: {'lr': 1.062764395643742e-05, 'samples': 23224320, 'steps': 45359, 'batch_loss/train': 0.6692573865875602} +12/28/2021 22:29:33 - INFO - codeparrot_training - Step 45360: {'lr': 1.0623097266570347e-05, 'samples': 23224832, 'steps': 45360, 'batch_loss/train': 0.8730495180934668} +12/28/2021 22:29:44 - INFO - codeparrot_training - Step 45361: {'lr': 1.061855152837543e-05, 'samples': 23225344, 'steps': 45361, 'batch_loss/train': 0.7029155241325498} +12/28/2021 22:29:58 - INFO - codeparrot_training - Step 45362: {'lr': 1.0614006741870797e-05, 'samples': 23225856, 'steps': 45362, 'batch_loss/train': 0.7678727144375443} +12/28/2021 22:30:09 - INFO - codeparrot_training - Step 45363: {'lr': 1.060946290707443e-05, 'samples': 23226368, 'steps': 45363, 'batch_loss/train': 0.6651145978830755} +12/28/2021 22:30:19 - INFO - codeparrot_training - Step 45364: {'lr': 1.0604920024004482e-05, 'samples': 23226880, 'steps': 45364, 'batch_loss/train': 0.8230637712404132} +12/28/2021 22:30:31 - INFO - codeparrot_training - Step 45365: {'lr': 1.0600378092679026e-05, 'samples': 23227392, 'steps': 45365, 'batch_loss/train': 0.7300275946035981} +12/28/2021 22:30:42 - INFO - codeparrot_training - Step 45366: {'lr': 1.0595837113116014e-05, 'samples': 23227904, 'steps': 45366, 'batch_loss/train': 0.6930468659847975} +12/28/2021 22:30:53 - INFO - codeparrot_training - Step 45367: {'lr': 1.0591297085333573e-05, 'samples': 23228416, 'steps': 45367, 'batch_loss/train': 0.8013980253599584} +12/28/2021 22:31:07 - INFO - codeparrot_training - Step 45368: {'lr': 1.0586758009349745e-05, 'samples': 23228928, 'steps': 45368, 'batch_loss/train': 0.7181337913498282} +12/28/2021 22:31:17 - INFO - codeparrot_training - Step 45369: {'lr': 1.0582219885182542e-05, 'samples': 23229440, 'steps': 45369, 'batch_loss/train': 0.7100527049042284} +12/28/2021 22:31:28 - INFO - codeparrot_training - Step 45370: {'lr': 1.0577682712850061e-05, 'samples': 23229952, 'steps': 45370, 'batch_loss/train': 0.665957611054182} +12/28/2021 22:31:38 - INFO - codeparrot_training - Step 45371: {'lr': 1.0573146492370289e-05, 'samples': 23230464, 'steps': 45371, 'batch_loss/train': 0.7725987378507853} +12/28/2021 22:31:51 - INFO - codeparrot_training - Step 45372: {'lr': 1.0568611223761293e-05, 'samples': 23230976, 'steps': 45372, 'batch_loss/train': 0.6724550444632769} +12/28/2021 22:32:02 - INFO - codeparrot_training - Step 45373: {'lr': 1.0564076907041087e-05, 'samples': 23231488, 'steps': 45373, 'batch_loss/train': 0.7463839296251535} +12/28/2021 22:32:12 - INFO - codeparrot_training - Step 45374: {'lr': 1.0559543542227711e-05, 'samples': 23232000, 'steps': 45374, 'batch_loss/train': 0.6817212644964457} +12/28/2021 22:32:24 - INFO - codeparrot_training - Step 45375: {'lr': 1.0555011129339181e-05, 'samples': 23232512, 'steps': 45375, 'batch_loss/train': 0.34417605539783835} +12/28/2021 22:32:35 - INFO - codeparrot_training - Step 45376: {'lr': 1.0550479668393537e-05, 'samples': 23233024, 'steps': 45376, 'batch_loss/train': 0.7010289751924574} +12/28/2021 22:32:46 - INFO - codeparrot_training - Step 45377: {'lr': 1.0545949159408762e-05, 'samples': 23233536, 'steps': 45377, 'batch_loss/train': 0.8255411572754383} +12/28/2021 22:32:58 - INFO - codeparrot_training - Step 45378: {'lr': 1.054141960240282e-05, 'samples': 23234048, 'steps': 45378, 'batch_loss/train': 0.7538873115554452} +12/28/2021 22:33:09 - INFO - codeparrot_training - Step 45379: {'lr': 1.0536890997393855e-05, 'samples': 23234560, 'steps': 45379, 'batch_loss/train': 0.7663059572223574} +12/28/2021 22:33:19 - INFO - codeparrot_training - Step 45380: {'lr': 1.0532363344399776e-05, 'samples': 23235072, 'steps': 45380, 'batch_loss/train': 0.7639625910669565} +12/28/2021 22:33:30 - INFO - codeparrot_training - Step 45381: {'lr': 1.0527836643438537e-05, 'samples': 23235584, 'steps': 45381, 'batch_loss/train': 0.7372170723974705} +12/28/2021 22:33:44 - INFO - codeparrot_training - Step 45382: {'lr': 1.0523310894528292e-05, 'samples': 23236096, 'steps': 45382, 'batch_loss/train': 0.7838636082597077} +12/28/2021 22:33:54 - INFO - codeparrot_training - Step 45383: {'lr': 1.0518786097686889e-05, 'samples': 23236608, 'steps': 45383, 'batch_loss/train': 0.6557683162391186} +12/28/2021 22:34:05 - INFO - codeparrot_training - Step 45384: {'lr': 1.0514262252932395e-05, 'samples': 23237120, 'steps': 45384, 'batch_loss/train': 0.628497215511743} +12/28/2021 22:34:17 - INFO - codeparrot_training - Step 45385: {'lr': 1.0509739360282739e-05, 'samples': 23237632, 'steps': 45385, 'batch_loss/train': 0.6949268446769565} +12/28/2021 22:34:28 - INFO - codeparrot_training - Step 45386: {'lr': 1.0505217419755936e-05, 'samples': 23238144, 'steps': 45386, 'batch_loss/train': 0.7229671152308583} +12/28/2021 22:34:38 - INFO - codeparrot_training - Step 45387: {'lr': 1.0500696431369972e-05, 'samples': 23238656, 'steps': 45387, 'batch_loss/train': 0.6648648795671761} +12/28/2021 22:34:51 - INFO - codeparrot_training - Step 45388: {'lr': 1.049617639514283e-05, 'samples': 23239168, 'steps': 45388, 'batch_loss/train': 0.5769379602279514} +12/28/2021 22:35:01 - INFO - codeparrot_training - Step 45389: {'lr': 1.049165731109239e-05, 'samples': 23239680, 'steps': 45389, 'batch_loss/train': 0.7126280022785068} +12/28/2021 22:35:12 - INFO - codeparrot_training - Step 45390: {'lr': 1.0487139179236715e-05, 'samples': 23240192, 'steps': 45390, 'batch_loss/train': 0.6406337291700765} +12/28/2021 22:35:26 - INFO - codeparrot_training - Step 45391: {'lr': 1.0482621999593767e-05, 'samples': 23240704, 'steps': 45391, 'batch_loss/train': 0.7010802970035002} +12/28/2021 22:35:37 - INFO - codeparrot_training - Step 45392: {'lr': 1.0478105772181417e-05, 'samples': 23241216, 'steps': 45392, 'batch_loss/train': 0.6785030226456001} +12/28/2021 22:35:47 - INFO - codeparrot_training - Step 45393: {'lr': 1.0473590497017737e-05, 'samples': 23241728, 'steps': 45393, 'batch_loss/train': 0.6706264442764223} +12/28/2021 22:35:58 - INFO - codeparrot_training - Step 45394: {'lr': 1.0469076174120628e-05, 'samples': 23242240, 'steps': 45394, 'batch_loss/train': 0.7101914626546204} +12/28/2021 22:36:10 - INFO - codeparrot_training - Step 45395: {'lr': 1.0464562803507992e-05, 'samples': 23242752, 'steps': 45395, 'batch_loss/train': 0.7095421929843724} +12/28/2021 22:36:21 - INFO - codeparrot_training - Step 45396: {'lr': 1.0460050385197816e-05, 'samples': 23243264, 'steps': 45396, 'batch_loss/train': 0.7088321119081229} +12/28/2021 22:36:31 - INFO - codeparrot_training - Step 45397: {'lr': 1.0455538919208029e-05, 'samples': 23243776, 'steps': 45397, 'batch_loss/train': 0.6953708860091865} +12/28/2021 22:36:45 - INFO - codeparrot_training - Step 45398: {'lr': 1.045102840555659e-05, 'samples': 23244288, 'steps': 45398, 'batch_loss/train': 0.6995573155581951} +12/28/2021 22:36:56 - INFO - codeparrot_training - Step 45399: {'lr': 1.044651884426137e-05, 'samples': 23244800, 'steps': 45399, 'batch_loss/train': 0.7807993814349174} +12/28/2021 22:37:07 - INFO - codeparrot_training - Step 45400: {'lr': 1.0442010235340388e-05, 'samples': 23245312, 'steps': 45400, 'batch_loss/train': 0.6536472500301898} +12/28/2021 22:37:19 - INFO - codeparrot_training - Step 45401: {'lr': 1.0437502578811486e-05, 'samples': 23245824, 'steps': 45401, 'batch_loss/train': 0.7597985593602061} +12/28/2021 22:37:29 - INFO - codeparrot_training - Step 45402: {'lr': 1.0432995874692625e-05, 'samples': 23246336, 'steps': 45402, 'batch_loss/train': 0.7172085456550121} +12/28/2021 22:37:40 - INFO - codeparrot_training - Step 45403: {'lr': 1.042849012300176e-05, 'samples': 23246848, 'steps': 45403, 'batch_loss/train': 0.7548709074035287} +12/28/2021 22:37:52 - INFO - codeparrot_training - Step 45404: {'lr': 1.0423985323756685e-05, 'samples': 23247360, 'steps': 45404, 'batch_loss/train': 0.6475929458392784} +12/28/2021 22:38:03 - INFO - codeparrot_training - Step 45405: {'lr': 1.0419481476975439e-05, 'samples': 23247872, 'steps': 45405, 'batch_loss/train': 0.6416481880005449} +12/28/2021 22:38:13 - INFO - codeparrot_training - Step 45406: {'lr': 1.0414978582675871e-05, 'samples': 23248384, 'steps': 45406, 'batch_loss/train': 0.8667937167920172} +12/28/2021 22:38:24 - INFO - codeparrot_training - Step 45407: {'lr': 1.0410476640875827e-05, 'samples': 23248896, 'steps': 45407, 'batch_loss/train': 0.7248216252774} +12/28/2021 22:38:38 - INFO - codeparrot_training - Step 45408: {'lr': 1.0405975651593318e-05, 'samples': 23249408, 'steps': 45408, 'batch_loss/train': 0.7124078895431012} +12/28/2021 22:38:49 - INFO - codeparrot_training - Step 45409: {'lr': 1.0401475614846168e-05, 'samples': 23249920, 'steps': 45409, 'batch_loss/train': 0.7964088493026793} +12/28/2021 22:38:59 - INFO - codeparrot_training - Step 45410: {'lr': 1.0396976530652274e-05, 'samples': 23250432, 'steps': 45410, 'batch_loss/train': 0.6339516146108508} +12/28/2021 22:39:11 - INFO - codeparrot_training - Step 45411: {'lr': 1.0392478399029543e-05, 'samples': 23250944, 'steps': 45411, 'batch_loss/train': 0.7757703238166869} +12/28/2021 22:39:22 - INFO - codeparrot_training - Step 45412: {'lr': 1.0387981219995845e-05, 'samples': 23251456, 'steps': 45412, 'batch_loss/train': 0.6825442593544722} +12/28/2021 22:39:33 - INFO - codeparrot_training - Step 45413: {'lr': 1.0383484993569059e-05, 'samples': 23251968, 'steps': 45413, 'batch_loss/train': 0.7136816214770079} +12/28/2021 22:39:45 - INFO - codeparrot_training - Step 45414: {'lr': 1.0378989719767084e-05, 'samples': 23252480, 'steps': 45414, 'batch_loss/train': 0.6821950803278014} +12/28/2021 22:39:55 - INFO - codeparrot_training - Step 45415: {'lr': 1.0374495398607715e-05, 'samples': 23252992, 'steps': 45415, 'batch_loss/train': 0.7597128306515515} +12/28/2021 22:40:06 - INFO - codeparrot_training - Step 45416: {'lr': 1.0370002030108877e-05, 'samples': 23253504, 'steps': 45416, 'batch_loss/train': 0.7166423702146858} +12/28/2021 22:40:17 - INFO - codeparrot_training - Step 45417: {'lr': 1.0365509614288504e-05, 'samples': 23254016, 'steps': 45417, 'batch_loss/train': 0.6693125630263239} +12/28/2021 22:40:30 - INFO - codeparrot_training - Step 45418: {'lr': 1.0361018151164275e-05, 'samples': 23254528, 'steps': 45418, 'batch_loss/train': 0.7281099520623684} +12/28/2021 22:40:41 - INFO - codeparrot_training - Step 45419: {'lr': 1.0356527640754231e-05, 'samples': 23255040, 'steps': 45419, 'batch_loss/train': 0.8884070133790374} +12/28/2021 22:40:51 - INFO - codeparrot_training - Step 45420: {'lr': 1.0352038083076137e-05, 'samples': 23255552, 'steps': 45420, 'batch_loss/train': 1.2798552359454334} +12/28/2021 22:41:05 - INFO - codeparrot_training - Step 45421: {'lr': 1.0347549478147838e-05, 'samples': 23256064, 'steps': 45421, 'batch_loss/train': 0.6670440074522048} +12/28/2021 22:41:16 - INFO - codeparrot_training - Step 45422: {'lr': 1.034306182598721e-05, 'samples': 23256576, 'steps': 45422, 'batch_loss/train': 0.7143973768688738} +12/28/2021 22:41:26 - INFO - codeparrot_training - Step 45423: {'lr': 1.0338575126612043e-05, 'samples': 23257088, 'steps': 45423, 'batch_loss/train': 0.6840759888291359} +12/28/2021 22:41:37 - INFO - codeparrot_training - Step 45424: {'lr': 1.033408938004024e-05, 'samples': 23257600, 'steps': 45424, 'batch_loss/train': 0.7132362159900367} +12/28/2021 22:41:49 - INFO - codeparrot_training - Step 45425: {'lr': 1.0329604586289593e-05, 'samples': 23258112, 'steps': 45425, 'batch_loss/train': 0.7773328577168286} +12/28/2021 22:42:00 - INFO - codeparrot_training - Step 45426: {'lr': 1.0325120745377947e-05, 'samples': 23258624, 'steps': 45426, 'batch_loss/train': 0.7127693556249142} +12/28/2021 22:42:10 - INFO - codeparrot_training - Step 45427: {'lr': 1.0320637857323095e-05, 'samples': 23259136, 'steps': 45427, 'batch_loss/train': 0.6564622209407389} +12/28/2021 22:42:24 - INFO - codeparrot_training - Step 45428: {'lr': 1.0316155922142911e-05, 'samples': 23259648, 'steps': 45428, 'batch_loss/train': 0.756042345194146} +12/28/2021 22:42:35 - INFO - codeparrot_training - Step 45429: {'lr': 1.0311674939855187e-05, 'samples': 23260160, 'steps': 45429, 'batch_loss/train': 0.573965908784885} +12/28/2021 22:42:45 - INFO - codeparrot_training - Step 45430: {'lr': 1.0307194910477742e-05, 'samples': 23260672, 'steps': 45430, 'batch_loss/train': 0.6665816507302225} +12/28/2021 22:42:58 - INFO - codeparrot_training - Step 45431: {'lr': 1.030271583402842e-05, 'samples': 23261184, 'steps': 45431, 'batch_loss/train': 0.7720927456393838} +12/28/2021 22:43:08 - INFO - codeparrot_training - Step 45432: {'lr': 1.029823771052496e-05, 'samples': 23261696, 'steps': 45432, 'batch_loss/train': 0.7306536943651736} +12/28/2021 22:43:19 - INFO - codeparrot_training - Step 45433: {'lr': 1.0293760539985181e-05, 'samples': 23262208, 'steps': 45433, 'batch_loss/train': 0.7406465304084122} +12/28/2021 22:43:31 - INFO - codeparrot_training - Step 45434: {'lr': 1.0289284322426928e-05, 'samples': 23262720, 'steps': 45434, 'batch_loss/train': 0.7493166914209723} +12/28/2021 22:43:42 - INFO - codeparrot_training - Step 45435: {'lr': 1.0284809057867966e-05, 'samples': 23263232, 'steps': 45435, 'batch_loss/train': 0.6817869660444558} +12/28/2021 22:43:52 - INFO - codeparrot_training - Step 45436: {'lr': 1.0280334746326087e-05, 'samples': 23263744, 'steps': 45436, 'batch_loss/train': 0.6671408917754889} +12/28/2021 22:44:03 - INFO - codeparrot_training - Step 45437: {'lr': 1.0275861387819079e-05, 'samples': 23264256, 'steps': 45437, 'batch_loss/train': 0.7080861581489444} +12/28/2021 22:44:17 - INFO - codeparrot_training - Step 45438: {'lr': 1.027138898236471e-05, 'samples': 23264768, 'steps': 45438, 'batch_loss/train': 0.7339762854389846} +12/28/2021 22:44:27 - INFO - codeparrot_training - Step 45439: {'lr': 1.0266917529980824e-05, 'samples': 23265280, 'steps': 45439, 'batch_loss/train': 0.6934661772102118} +12/28/2021 22:44:38 - INFO - codeparrot_training - Step 45440: {'lr': 1.0262447030685156e-05, 'samples': 23265792, 'steps': 45440, 'batch_loss/train': 0.7149080624803901} +12/28/2021 22:44:50 - INFO - codeparrot_training - Step 45441: {'lr': 1.0257977484495418e-05, 'samples': 23266304, 'steps': 45441, 'batch_loss/train': 0.6664924152137246} +12/28/2021 22:45:01 - INFO - codeparrot_training - Step 45442: {'lr': 1.0253508891429452e-05, 'samples': 23266816, 'steps': 45442, 'batch_loss/train': 0.7770499708130956} +12/28/2021 22:45:11 - INFO - codeparrot_training - Step 45443: {'lr': 1.0249041251505082e-05, 'samples': 23267328, 'steps': 45443, 'batch_loss/train': 0.6752933119423687} +12/28/2021 22:45:23 - INFO - codeparrot_training - Step 45444: {'lr': 1.02445745647399e-05, 'samples': 23267840, 'steps': 45444, 'batch_loss/train': 0.8031979193910956} +12/28/2021 22:45:34 - INFO - codeparrot_training - Step 45445: {'lr': 1.0240108831151812e-05, 'samples': 23268352, 'steps': 45445, 'batch_loss/train': 0.6415101177990437} +12/28/2021 22:45:45 - INFO - codeparrot_training - Step 45446: {'lr': 1.0235644050758552e-05, 'samples': 23268864, 'steps': 45446, 'batch_loss/train': 0.6904356991872191} +12/28/2021 22:45:55 - INFO - codeparrot_training - Step 45447: {'lr': 1.0231180223577802e-05, 'samples': 23269376, 'steps': 45447, 'batch_loss/train': 0.6995802614837885} +12/28/2021 22:46:09 - INFO - codeparrot_training - Step 45448: {'lr': 1.0226717349627323e-05, 'samples': 23269888, 'steps': 45448, 'batch_loss/train': 0.8386504200752825} +12/28/2021 22:46:20 - INFO - codeparrot_training - Step 45449: {'lr': 1.022225542892491e-05, 'samples': 23270400, 'steps': 45449, 'batch_loss/train': 0.6644402893725783} +12/28/2021 22:46:31 - INFO - codeparrot_training - Step 45450: {'lr': 1.0217794461488267e-05, 'samples': 23270912, 'steps': 45450, 'batch_loss/train': 0.7123370412737131} +12/28/2021 22:46:43 - INFO - codeparrot_training - Step 45451: {'lr': 1.0213334447335132e-05, 'samples': 23271424, 'steps': 45451, 'batch_loss/train': 0.6605602707713842} +12/28/2021 22:46:54 - INFO - codeparrot_training - Step 45452: {'lr': 1.0208875386483268e-05, 'samples': 23271936, 'steps': 45452, 'batch_loss/train': 0.8594098784960806} +12/28/2021 22:47:04 - INFO - codeparrot_training - Step 45453: {'lr': 1.0204417278950328e-05, 'samples': 23272448, 'steps': 45453, 'batch_loss/train': 0.8014468904584646} +12/28/2021 22:47:16 - INFO - codeparrot_training - Step 45454: {'lr': 1.0199960124754104e-05, 'samples': 23272960, 'steps': 45454, 'batch_loss/train': 0.5376245186198503} +12/28/2021 22:47:27 - INFO - codeparrot_training - Step 45455: {'lr': 1.0195503923912303e-05, 'samples': 23273472, 'steps': 45455, 'batch_loss/train': 0.6523260503308848} +12/28/2021 22:47:38 - INFO - codeparrot_training - Step 45456: {'lr': 1.0191048676442605e-05, 'samples': 23273984, 'steps': 45456, 'batch_loss/train': 0.7949811797589064} +12/28/2021 22:47:51 - INFO - codeparrot_training - Step 45457: {'lr': 1.0186594382362802e-05, 'samples': 23274496, 'steps': 45457, 'batch_loss/train': 0.7885586069896817} +12/28/2021 22:48:02 - INFO - codeparrot_training - Step 45458: {'lr': 1.0182141041690519e-05, 'samples': 23275008, 'steps': 45458, 'batch_loss/train': 0.712237901519984} +12/28/2021 22:48:13 - INFO - codeparrot_training - Step 45459: {'lr': 1.0177688654443462e-05, 'samples': 23275520, 'steps': 45459, 'batch_loss/train': 0.685056199785322} +12/28/2021 22:48:23 - INFO - codeparrot_training - Step 45460: {'lr': 1.0173237220639398e-05, 'samples': 23276032, 'steps': 45460, 'batch_loss/train': 0.7505501061677933} +12/28/2021 22:48:36 - INFO - codeparrot_training - Step 45461: {'lr': 1.0168786740295976e-05, 'samples': 23276544, 'steps': 45461, 'batch_loss/train': 0.6024831961840391} +12/28/2021 22:48:46 - INFO - codeparrot_training - Step 45462: {'lr': 1.0164337213430908e-05, 'samples': 23277056, 'steps': 45462, 'batch_loss/train': 0.7912550941109657} +12/28/2021 22:48:57 - INFO - codeparrot_training - Step 45463: {'lr': 1.0159888640061871e-05, 'samples': 23277568, 'steps': 45463, 'batch_loss/train': 0.6827211594209075} +12/28/2021 22:49:09 - INFO - codeparrot_training - Step 45464: {'lr': 1.0155441020206547e-05, 'samples': 23278080, 'steps': 45464, 'batch_loss/train': 0.7907172059640288} +12/28/2021 22:49:19 - INFO - codeparrot_training - Step 45465: {'lr': 1.0150994353882642e-05, 'samples': 23278592, 'steps': 45465, 'batch_loss/train': 0.7264383211731911} +12/28/2021 22:49:30 - INFO - codeparrot_training - Step 45466: {'lr': 1.0146548641107811e-05, 'samples': 23279104, 'steps': 45466, 'batch_loss/train': 0.6673216689378023} +12/28/2021 22:49:44 - INFO - codeparrot_training - Step 45467: {'lr': 1.0142103881899734e-05, 'samples': 23279616, 'steps': 45467, 'batch_loss/train': 0.6635509072802961} +12/28/2021 22:49:55 - INFO - codeparrot_training - Step 45468: {'lr': 1.013766007627609e-05, 'samples': 23280128, 'steps': 45468, 'batch_loss/train': 0.7011138265952468} +12/28/2021 22:50:06 - INFO - codeparrot_training - Step 45469: {'lr': 1.0133217224254588e-05, 'samples': 23280640, 'steps': 45469, 'batch_loss/train': 0.724808621685952} +12/28/2021 22:50:16 - INFO - codeparrot_training - Step 45470: {'lr': 1.0128775325852769e-05, 'samples': 23281152, 'steps': 45470, 'batch_loss/train': 0.7891314532607794} +12/28/2021 22:50:28 - INFO - codeparrot_training - Step 45471: {'lr': 1.0124334381088395e-05, 'samples': 23281664, 'steps': 45471, 'batch_loss/train': 0.8664060514420271} +12/28/2021 22:50:39 - INFO - codeparrot_training - Step 45472: {'lr': 1.0119894389979124e-05, 'samples': 23282176, 'steps': 45472, 'batch_loss/train': 0.684996189083904} +12/28/2021 22:50:49 - INFO - codeparrot_training - Step 45473: {'lr': 1.0115455352542546e-05, 'samples': 23282688, 'steps': 45473, 'batch_loss/train': 0.7540377981495112} +12/28/2021 22:51:02 - INFO - codeparrot_training - Step 45474: {'lr': 1.011101726879629e-05, 'samples': 23283200, 'steps': 45474, 'batch_loss/train': 0.7005619881674647} +12/28/2021 22:51:12 - INFO - codeparrot_training - Step 45475: {'lr': 1.0106580138758148e-05, 'samples': 23283712, 'steps': 45475, 'batch_loss/train': 0.6136932764202356} +12/28/2021 22:51:23 - INFO - codeparrot_training - Step 45476: {'lr': 1.0102143962445632e-05, 'samples': 23284224, 'steps': 45476, 'batch_loss/train': 0.7534158984199166} +12/28/2021 22:51:37 - INFO - codeparrot_training - Step 45477: {'lr': 1.0097708739876393e-05, 'samples': 23284736, 'steps': 45477, 'batch_loss/train': 0.6736611728556454} +12/28/2021 22:51:47 - INFO - codeparrot_training - Step 45478: {'lr': 1.0093274471068086e-05, 'samples': 23285248, 'steps': 45478, 'batch_loss/train': 0.6697295429185033} +12/28/2021 22:51:58 - INFO - codeparrot_training - Step 45479: {'lr': 1.0088841156038337e-05, 'samples': 23285760, 'steps': 45479, 'batch_loss/train': 0.5048873771447688} +12/28/2021 22:52:10 - INFO - codeparrot_training - Step 45480: {'lr': 1.008440879480474e-05, 'samples': 23286272, 'steps': 45480, 'batch_loss/train': 0.7290785266086459} +12/28/2021 22:52:21 - INFO - codeparrot_training - Step 45481: {'lr': 1.0079977387384976e-05, 'samples': 23286784, 'steps': 45481, 'batch_loss/train': 0.668680329923518} +12/28/2021 22:52:31 - INFO - codeparrot_training - Step 45482: {'lr': 1.0075546933796642e-05, 'samples': 23287296, 'steps': 45482, 'batch_loss/train': 0.718552746810019} +12/28/2021 22:52:42 - INFO - codeparrot_training - Step 45483: {'lr': 1.0071117434057308e-05, 'samples': 23287808, 'steps': 45483, 'batch_loss/train': 0.6637555789202452} +12/28/2021 22:52:56 - INFO - codeparrot_training - Step 45484: {'lr': 1.0066688888184656e-05, 'samples': 23288320, 'steps': 45484, 'batch_loss/train': 0.7618849165737629} +12/28/2021 22:53:07 - INFO - codeparrot_training - Step 45485: {'lr': 1.0062261296196224e-05, 'samples': 23288832, 'steps': 45485, 'batch_loss/train': 0.642767951823771} +12/28/2021 22:53:17 - INFO - codeparrot_training - Step 45486: {'lr': 1.0057834658109665e-05, 'samples': 23289344, 'steps': 45486, 'batch_loss/train': 0.678557604085654} +12/28/2021 22:53:29 - INFO - codeparrot_training - Step 45487: {'lr': 1.0053408973942552e-05, 'samples': 23289856, 'steps': 45487, 'batch_loss/train': 0.7388386139646173} +12/28/2021 22:53:40 - INFO - codeparrot_training - Step 45488: {'lr': 1.0048984243712478e-05, 'samples': 23290368, 'steps': 45488, 'batch_loss/train': 0.6861914866603911} +12/28/2021 22:53:51 - INFO - codeparrot_training - Step 45489: {'lr': 1.0044560467437042e-05, 'samples': 23290880, 'steps': 45489, 'batch_loss/train': 0.7232690728269517} +12/28/2021 22:54:03 - INFO - codeparrot_training - Step 45490: {'lr': 1.0040137645133812e-05, 'samples': 23291392, 'steps': 45490, 'batch_loss/train': 0.6866155387833714} +12/28/2021 22:54:14 - INFO - codeparrot_training - Step 45491: {'lr': 1.0035715776820415e-05, 'samples': 23291904, 'steps': 45491, 'batch_loss/train': 0.6898050936870277} +12/28/2021 22:54:24 - INFO - codeparrot_training - Step 45492: {'lr': 1.0031294862514389e-05, 'samples': 23292416, 'steps': 45492, 'batch_loss/train': 0.6065362724475563} +12/28/2021 22:54:35 - INFO - codeparrot_training - Step 45493: {'lr': 1.0026874902233335e-05, 'samples': 23292928, 'steps': 45493, 'batch_loss/train': 0.6946635388303548} +12/28/2021 22:54:47 - INFO - codeparrot_training - Step 45494: {'lr': 1.0022455895994819e-05, 'samples': 23293440, 'steps': 45494, 'batch_loss/train': 0.5808107976336032} +12/28/2021 22:54:58 - INFO - codeparrot_training - Step 45495: {'lr': 1.0018037843816413e-05, 'samples': 23293952, 'steps': 45495, 'batch_loss/train': 0.6844837171956897} +12/28/2021 22:55:08 - INFO - codeparrot_training - Step 45496: {'lr': 1.001362074571563e-05, 'samples': 23294464, 'steps': 45496, 'batch_loss/train': 0.5810388496611267} +12/28/2021 22:55:22 - INFO - codeparrot_training - Step 45497: {'lr': 1.000920460171012e-05, 'samples': 23294976, 'steps': 45497, 'batch_loss/train': 0.6670199800282717} +12/28/2021 22:55:33 - INFO - codeparrot_training - Step 45498: {'lr': 1.00047894118174e-05, 'samples': 23295488, 'steps': 45498, 'batch_loss/train': 0.683261682454031} +12/28/2021 22:55:43 - INFO - codeparrot_training - Step 45499: {'lr': 1.000037517605501e-05, 'samples': 23296000, 'steps': 45499, 'batch_loss/train': 0.6006805319339037} +12/28/2021 22:55:56 - INFO - codeparrot_training - Step 45500: {'lr': 9.995961894440464e-06, 'samples': 23296512, 'steps': 45500, 'batch_loss/train': 0.7023093798197806} +12/28/2021 22:56:06 - INFO - codeparrot_training - Step 45501: {'lr': 9.991549566991415e-06, 'samples': 23297024, 'steps': 45501, 'batch_loss/train': 0.5977705442346632} +12/28/2021 22:56:17 - INFO - codeparrot_training - Step 45502: {'lr': 9.98713819372532e-06, 'samples': 23297536, 'steps': 45502, 'batch_loss/train': 0.7653553104028106} +12/28/2021 22:56:29 - INFO - codeparrot_training - Step 45503: {'lr': 9.982727774659722e-06, 'samples': 23298048, 'steps': 45503, 'batch_loss/train': 0.7220573916565627} +12/28/2021 22:56:40 - INFO - codeparrot_training - Step 45504: {'lr': 9.97831830981219e-06, 'samples': 23298560, 'steps': 45504, 'batch_loss/train': 0.7300571193918586} +12/28/2021 22:56:50 - INFO - codeparrot_training - Step 45505: {'lr': 9.973909799200237e-06, 'samples': 23299072, 'steps': 45505, 'batch_loss/train': 0.7879718937911093} +12/28/2021 22:57:01 - INFO - codeparrot_training - Step 45506: {'lr': 9.969502242841377e-06, 'samples': 23299584, 'steps': 45506, 'batch_loss/train': 0.7479194137267768} +12/28/2021 22:57:15 - INFO - codeparrot_training - Step 45507: {'lr': 9.965095640753125e-06, 'samples': 23300096, 'steps': 45507, 'batch_loss/train': 0.7883351603522897} +12/28/2021 22:57:25 - INFO - codeparrot_training - Step 45508: {'lr': 9.960689992953049e-06, 'samples': 23300608, 'steps': 45508, 'batch_loss/train': 0.7467877026647329} +12/28/2021 22:57:36 - INFO - codeparrot_training - Step 45509: {'lr': 9.956285299458606e-06, 'samples': 23301120, 'steps': 45509, 'batch_loss/train': 0.7029042076319456} +12/28/2021 22:57:48 - INFO - codeparrot_training - Step 45510: {'lr': 9.951881560287395e-06, 'samples': 23301632, 'steps': 45510, 'batch_loss/train': 0.6696406435221434} +12/28/2021 22:57:59 - INFO - codeparrot_training - Step 45511: {'lr': 9.947478775456792e-06, 'samples': 23302144, 'steps': 45511, 'batch_loss/train': 0.6876383516937494} +12/28/2021 22:58:09 - INFO - codeparrot_training - Step 45512: {'lr': 9.943076944984448e-06, 'samples': 23302656, 'steps': 45512, 'batch_loss/train': 0.8769966941326857} +12/28/2021 22:58:23 - INFO - codeparrot_training - Step 45513: {'lr': 9.938676068887736e-06, 'samples': 23303168, 'steps': 45513, 'batch_loss/train': 0.6930886174086481} +12/28/2021 22:58:34 - INFO - codeparrot_training - Step 45514: {'lr': 9.9342761471842e-06, 'samples': 23303680, 'steps': 45514, 'batch_loss/train': 0.4847627100534737} +12/28/2021 22:58:45 - INFO - codeparrot_training - Step 45515: {'lr': 9.929877179891356e-06, 'samples': 23304192, 'steps': 45515, 'batch_loss/train': 0.7519457284361124} +12/28/2021 22:58:55 - INFO - codeparrot_training - Step 45516: {'lr': 9.925479167026658e-06, 'samples': 23304704, 'steps': 45516, 'batch_loss/train': 0.6979496772401035} +12/28/2021 22:59:07 - INFO - codeparrot_training - Step 45517: {'lr': 9.921082108607621e-06, 'samples': 23305216, 'steps': 45517, 'batch_loss/train': 0.6892920737154782} +12/28/2021 22:59:18 - INFO - codeparrot_training - Step 45518: {'lr': 9.916686004651705e-06, 'samples': 23305728, 'steps': 45518, 'batch_loss/train': 0.7359628556296229} +12/28/2021 22:59:29 - INFO - codeparrot_training - Step 45519: {'lr': 9.912290855176392e-06, 'samples': 23306240, 'steps': 45519, 'batch_loss/train': 0.7794537162408233} +12/28/2021 22:59:41 - INFO - codeparrot_training - Step 45520: {'lr': 9.907896660199173e-06, 'samples': 23306752, 'steps': 45520, 'batch_loss/train': 0.7599857272580266} +12/28/2021 22:59:52 - INFO - codeparrot_training - Step 45521: {'lr': 9.903503419737503e-06, 'samples': 23307264, 'steps': 45521, 'batch_loss/train': 0.789836535230279} +12/28/2021 23:00:02 - INFO - codeparrot_training - Step 45522: {'lr': 9.899111133808813e-06, 'samples': 23307776, 'steps': 45522, 'batch_loss/train': 0.6907774759456515} +12/28/2021 23:00:16 - INFO - codeparrot_training - Step 45523: {'lr': 9.894719802430619e-06, 'samples': 23308288, 'steps': 45523, 'batch_loss/train': 0.6190168666653335} +12/28/2021 23:00:27 - INFO - codeparrot_training - Step 45524: {'lr': 9.890329425620377e-06, 'samples': 23308800, 'steps': 45524, 'batch_loss/train': 0.6864514751941897} +12/28/2021 23:00:37 - INFO - codeparrot_training - Step 45525: {'lr': 9.885940003395488e-06, 'samples': 23309312, 'steps': 45525, 'batch_loss/train': 0.7505368441343307} +12/28/2021 23:00:49 - INFO - codeparrot_training - Step 45526: {'lr': 9.881551535773441e-06, 'samples': 23309824, 'steps': 45526, 'batch_loss/train': 0.7795694340020418} +12/28/2021 23:01:00 - INFO - codeparrot_training - Step 45527: {'lr': 9.877164022771723e-06, 'samples': 23310336, 'steps': 45527, 'batch_loss/train': 0.6768879345618188} +12/28/2021 23:01:11 - INFO - codeparrot_training - Step 45528: {'lr': 9.872777464407706e-06, 'samples': 23310848, 'steps': 45528, 'batch_loss/train': 0.7058514383388683} +12/28/2021 23:01:21 - INFO - codeparrot_training - Step 45529: {'lr': 9.868391860698822e-06, 'samples': 23311360, 'steps': 45529, 'batch_loss/train': 0.727812509983778} +12/28/2021 23:01:34 - INFO - codeparrot_training - Step 45530: {'lr': 9.864007211662585e-06, 'samples': 23311872, 'steps': 45530, 'batch_loss/train': 0.7431049030274153} +12/28/2021 23:01:44 - INFO - codeparrot_training - Step 45531: {'lr': 9.859623517316368e-06, 'samples': 23312384, 'steps': 45531, 'batch_loss/train': 0.7349691314157099} +12/28/2021 23:01:55 - INFO - codeparrot_training - Step 45532: {'lr': 9.855240777677632e-06, 'samples': 23312896, 'steps': 45532, 'batch_loss/train': 0.6816644514910877} +12/28/2021 23:02:07 - INFO - codeparrot_training - Step 45533: {'lr': 9.85085899276375e-06, 'samples': 23313408, 'steps': 45533, 'batch_loss/train': 0.8163027800619602} +12/28/2021 23:02:18 - INFO - codeparrot_training - Step 45534: {'lr': 9.846478162592209e-06, 'samples': 23313920, 'steps': 45534, 'batch_loss/train': 0.6984525746665895} +12/28/2021 23:02:28 - INFO - codeparrot_training - Step 45535: {'lr': 9.842098287180385e-06, 'samples': 23314432, 'steps': 45535, 'batch_loss/train': 0.7291670325212181} +12/28/2021 23:02:42 - INFO - codeparrot_training - Step 45536: {'lr': 9.837719366545706e-06, 'samples': 23314944, 'steps': 45536, 'batch_loss/train': 0.7341048792004585} +12/28/2021 23:02:53 - INFO - codeparrot_training - Step 45537: {'lr': 9.833341400705548e-06, 'samples': 23315456, 'steps': 45537, 'batch_loss/train': 0.6475710591766983} +12/28/2021 23:03:04 - INFO - codeparrot_training - Step 45538: {'lr': 9.82896438967737e-06, 'samples': 23315968, 'steps': 45538, 'batch_loss/train': 0.752103746519424} +12/28/2021 23:03:14 - INFO - codeparrot_training - Step 45539: {'lr': 9.824588333478518e-06, 'samples': 23316480, 'steps': 45539, 'batch_loss/train': 0.7246462611947209} +12/28/2021 23:03:26 - INFO - codeparrot_training - Step 45540: {'lr': 9.820213232126396e-06, 'samples': 23316992, 'steps': 45540, 'batch_loss/train': 0.7982499664649367} +12/28/2021 23:03:37 - INFO - codeparrot_training - Step 45541: {'lr': 9.815839085638489e-06, 'samples': 23317504, 'steps': 45541, 'batch_loss/train': 0.6693907547742128} +12/28/2021 23:03:48 - INFO - codeparrot_training - Step 45542: {'lr': 9.811465894032063e-06, 'samples': 23318016, 'steps': 45542, 'batch_loss/train': 0.7471036617644131} +12/28/2021 23:04:01 - INFO - codeparrot_training - Step 45543: {'lr': 9.807093657324573e-06, 'samples': 23318528, 'steps': 45543, 'batch_loss/train': 0.7052305545657873} +12/28/2021 23:04:12 - INFO - codeparrot_training - Step 45544: {'lr': 9.802722375533369e-06, 'samples': 23319040, 'steps': 45544, 'batch_loss/train': 0.6898606307804585} +12/28/2021 23:04:23 - INFO - codeparrot_training - Step 45545: {'lr': 9.79835204867588e-06, 'samples': 23319552, 'steps': 45545, 'batch_loss/train': 0.6797647168859839} +12/28/2021 23:04:35 - INFO - codeparrot_training - Step 45546: {'lr': 9.793982676769398e-06, 'samples': 23320064, 'steps': 45546, 'batch_loss/train': 0.5720435864059255} +12/28/2021 23:04:45 - INFO - codeparrot_training - Step 45547: {'lr': 9.789614259831409e-06, 'samples': 23320576, 'steps': 45547, 'batch_loss/train': 0.6984084469731897} +12/28/2021 23:04:56 - INFO - codeparrot_training - Step 45548: {'lr': 9.78524679787915e-06, 'samples': 23321088, 'steps': 45548, 'batch_loss/train': 0.837005041539669} +12/28/2021 23:05:07 - INFO - codeparrot_training - Step 45549: {'lr': 9.780880290930078e-06, 'samples': 23321600, 'steps': 45549, 'batch_loss/train': 0.6553143444471061} +12/28/2021 23:05:19 - INFO - codeparrot_training - Step 45550: {'lr': 9.77651473900154e-06, 'samples': 23322112, 'steps': 45550, 'batch_loss/train': 0.6965626794844866} +12/28/2021 23:05:29 - INFO - codeparrot_training - Step 45551: {'lr': 9.77215014211083e-06, 'samples': 23322624, 'steps': 45551, 'batch_loss/train': 0.6569881543400697} +12/28/2021 23:05:40 - INFO - codeparrot_training - Step 45552: {'lr': 9.76778650027535e-06, 'samples': 23323136, 'steps': 45552, 'batch_loss/train': 0.6927640987560153} +12/28/2021 23:05:54 - INFO - codeparrot_training - Step 45553: {'lr': 9.763423813512474e-06, 'samples': 23323648, 'steps': 45553, 'batch_loss/train': 0.7228050292469561} +12/28/2021 23:06:04 - INFO - codeparrot_training - Step 45554: {'lr': 9.759062081839521e-06, 'samples': 23324160, 'steps': 45554, 'batch_loss/train': 0.6875400384888053} +12/28/2021 23:06:15 - INFO - codeparrot_training - Step 45555: {'lr': 9.754701305273755e-06, 'samples': 23324672, 'steps': 45555, 'batch_loss/train': 0.6838415465317667} +12/28/2021 23:06:27 - INFO - codeparrot_training - Step 45556: {'lr': 9.750341483832664e-06, 'samples': 23325184, 'steps': 45556, 'batch_loss/train': 0.6506529079051688} +12/28/2021 23:06:38 - INFO - codeparrot_training - Step 45557: {'lr': 9.745982617533455e-06, 'samples': 23325696, 'steps': 45557, 'batch_loss/train': 0.7581533886259422} +12/28/2021 23:06:49 - INFO - codeparrot_training - Step 45558: {'lr': 9.741624706393532e-06, 'samples': 23326208, 'steps': 45558, 'batch_loss/train': 0.7722725071944296} +12/28/2021 23:07:00 - INFO - codeparrot_training - Step 45559: {'lr': 9.737267750430184e-06, 'samples': 23326720, 'steps': 45559, 'batch_loss/train': 0.6869260161183774} +12/28/2021 23:07:11 - INFO - codeparrot_training - Step 45560: {'lr': 9.732911749660706e-06, 'samples': 23327232, 'steps': 45560, 'batch_loss/train': 0.6747195445932448} +12/28/2021 23:07:22 - INFO - codeparrot_training - Step 45561: {'lr': 9.728556704102499e-06, 'samples': 23327744, 'steps': 45561, 'batch_loss/train': 0.7880747229792178} +12/28/2021 23:07:32 - INFO - codeparrot_training - Step 45562: {'lr': 9.724202613772826e-06, 'samples': 23328256, 'steps': 45562, 'batch_loss/train': 0.6612762766890228} +12/28/2021 23:07:47 - INFO - codeparrot_training - Step 45563: {'lr': 9.719849478688924e-06, 'samples': 23328768, 'steps': 45563, 'batch_loss/train': 0.6562710027210414} +12/28/2021 23:07:57 - INFO - codeparrot_training - Step 45564: {'lr': 9.715497298868253e-06, 'samples': 23329280, 'steps': 45564, 'batch_loss/train': 0.6951845221919939} +12/28/2021 23:08:08 - INFO - codeparrot_training - Step 45565: {'lr': 9.71114607432802e-06, 'samples': 23329792, 'steps': 45565, 'batch_loss/train': 0.6834589773206972} +12/28/2021 23:08:20 - INFO - codeparrot_training - Step 45566: {'lr': 9.706795805085517e-06, 'samples': 23330304, 'steps': 45566, 'batch_loss/train': 0.7661949424655177} +12/28/2021 23:08:31 - INFO - codeparrot_training - Step 45567: {'lr': 9.70244649115809e-06, 'samples': 23330816, 'steps': 45567, 'batch_loss/train': 0.6713146381080151} +12/28/2021 23:08:41 - INFO - codeparrot_training - Step 45568: {'lr': 9.698098132563005e-06, 'samples': 23331328, 'steps': 45568, 'batch_loss/train': 0.7176468030083925} +12/28/2021 23:08:54 - INFO - codeparrot_training - Step 45569: {'lr': 9.693750729317524e-06, 'samples': 23331840, 'steps': 45569, 'batch_loss/train': 0.6570214661769569} +12/28/2021 23:09:05 - INFO - codeparrot_training - Step 45570: {'lr': 9.689404281438969e-06, 'samples': 23332352, 'steps': 45570, 'batch_loss/train': 0.6609097048640251} +12/28/2021 23:09:15 - INFO - codeparrot_training - Step 45571: {'lr': 9.685058788944573e-06, 'samples': 23332864, 'steps': 45571, 'batch_loss/train': 0.7261445648036897} +12/28/2021 23:09:26 - INFO - codeparrot_training - Step 45572: {'lr': 9.680714251851686e-06, 'samples': 23333376, 'steps': 45572, 'batch_loss/train': 0.7181580872274935} +12/28/2021 23:09:39 - INFO - codeparrot_training - Step 45573: {'lr': 9.676370670177515e-06, 'samples': 23333888, 'steps': 45573, 'batch_loss/train': 0.9232547031715512} +12/28/2021 23:09:50 - INFO - codeparrot_training - Step 45574: {'lr': 9.672028043939351e-06, 'samples': 23334400, 'steps': 45574, 'batch_loss/train': 0.6682717697694898} +12/28/2021 23:10:00 - INFO - codeparrot_training - Step 45575: {'lr': 9.66768637315446e-06, 'samples': 23334912, 'steps': 45575, 'batch_loss/train': 0.6588187073357403} +12/28/2021 23:10:12 - INFO - codeparrot_training - Step 45576: {'lr': 9.663345657840133e-06, 'samples': 23335424, 'steps': 45576, 'batch_loss/train': 0.7104833265766501} +12/28/2021 23:10:23 - INFO - codeparrot_training - Step 45577: {'lr': 9.65900589801355e-06, 'samples': 23335936, 'steps': 45577, 'batch_loss/train': 0.7809540778398514} +12/28/2021 23:10:34 - INFO - codeparrot_training - Step 45578: {'lr': 9.654667093692033e-06, 'samples': 23336448, 'steps': 45578, 'batch_loss/train': 0.9215349843725562} +12/28/2021 23:10:47 - INFO - codeparrot_training - Step 45579: {'lr': 9.650329244892842e-06, 'samples': 23336960, 'steps': 45579, 'batch_loss/train': 0.7398596131242812} +12/28/2021 23:10:57 - INFO - codeparrot_training - Step 45580: {'lr': 9.645992351633131e-06, 'samples': 23337472, 'steps': 45580, 'batch_loss/train': 0.7487538361456245} +12/28/2021 23:11:08 - INFO - codeparrot_training - Step 45581: {'lr': 9.641656413930194e-06, 'samples': 23337984, 'steps': 45581, 'batch_loss/train': 1.0956158265471458} +12/28/2021 23:11:19 - INFO - codeparrot_training - Step 45582: {'lr': 9.63732143180135e-06, 'samples': 23338496, 'steps': 45582, 'batch_loss/train': 0.826207589590922} +12/28/2021 23:11:32 - INFO - codeparrot_training - Step 45583: {'lr': 9.632987405263693e-06, 'samples': 23339008, 'steps': 45583, 'batch_loss/train': 0.703369417693466} +12/28/2021 23:11:43 - INFO - codeparrot_training - Step 45584: {'lr': 9.628654334334547e-06, 'samples': 23339520, 'steps': 45584, 'batch_loss/train': 0.6735156625509262} +12/28/2021 23:11:54 - INFO - codeparrot_training - Step 45585: {'lr': 9.624322219031118e-06, 'samples': 23340032, 'steps': 45585, 'batch_loss/train': 0.7027934701181948} +12/28/2021 23:12:06 - INFO - codeparrot_training - Step 45586: {'lr': 9.619991059370587e-06, 'samples': 23340544, 'steps': 45586, 'batch_loss/train': 0.7368344059213996} +12/28/2021 23:12:17 - INFO - codeparrot_training - Step 45587: {'lr': 9.615660855370246e-06, 'samples': 23341056, 'steps': 45587, 'batch_loss/train': 0.7069672015495598} +12/28/2021 23:12:27 - INFO - codeparrot_training - Step 45588: {'lr': 9.611331607047275e-06, 'samples': 23341568, 'steps': 45588, 'batch_loss/train': 0.7092901836149395} +12/28/2021 23:12:38 - INFO - codeparrot_training - Step 45589: {'lr': 9.607003314418828e-06, 'samples': 23342080, 'steps': 45589, 'batch_loss/train': 0.709690093062818} +12/28/2021 23:12:50 - INFO - codeparrot_training - Step 45590: {'lr': 9.602675977502195e-06, 'samples': 23342592, 'steps': 45590, 'batch_loss/train': 0.6534809279255569} +12/28/2021 23:13:00 - INFO - codeparrot_training - Step 45591: {'lr': 9.598349596314588e-06, 'samples': 23343104, 'steps': 45591, 'batch_loss/train': 0.6775838428875431} +12/28/2021 23:13:11 - INFO - codeparrot_training - Step 45592: {'lr': 9.5940241708731e-06, 'samples': 23343616, 'steps': 45592, 'batch_loss/train': 0.6050346570555121} +12/28/2021 23:13:25 - INFO - codeparrot_training - Step 45593: {'lr': 9.589699701195054e-06, 'samples': 23344128, 'steps': 45593, 'batch_loss/train': 0.6453629713505507} +12/28/2021 23:13:36 - INFO - codeparrot_training - Step 45594: {'lr': 9.585376187297573e-06, 'samples': 23344640, 'steps': 45594, 'batch_loss/train': 0.6578679555095732} +12/28/2021 23:13:46 - INFO - codeparrot_training - Step 45595: {'lr': 9.581053629197866e-06, 'samples': 23345152, 'steps': 45595, 'batch_loss/train': 0.6465652892366052} +12/28/2021 23:13:58 - INFO - codeparrot_training - Step 45596: {'lr': 9.576732026913088e-06, 'samples': 23345664, 'steps': 45596, 'batch_loss/train': 0.6881764810532331} +12/28/2021 23:14:09 - INFO - codeparrot_training - Step 45597: {'lr': 9.572411380460471e-06, 'samples': 23346176, 'steps': 45597, 'batch_loss/train': 0.841520281508565} +12/28/2021 23:14:20 - INFO - codeparrot_training - Step 45598: {'lr': 9.568091689857145e-06, 'samples': 23346688, 'steps': 45598, 'batch_loss/train': 0.8005507998168468} +12/28/2021 23:14:34 - INFO - codeparrot_training - Step 45599: {'lr': 9.563772955120314e-06, 'samples': 23347200, 'steps': 45599, 'batch_loss/train': 0.6975928822648712} +12/28/2021 23:14:44 - INFO - codeparrot_training - Step 45600: {'lr': 9.559455176267106e-06, 'samples': 23347712, 'steps': 45600, 'batch_loss/train': 0.7531780106946826} +12/28/2021 23:14:55 - INFO - codeparrot_training - Step 45601: {'lr': 9.555138353314757e-06, 'samples': 23348224, 'steps': 45601, 'batch_loss/train': 0.661419422365725} +12/28/2021 23:15:06 - INFO - codeparrot_training - Step 45602: {'lr': 9.550822486280392e-06, 'samples': 23348736, 'steps': 45602, 'batch_loss/train': 0.6822636825963855} +12/28/2021 23:15:18 - INFO - codeparrot_training - Step 45603: {'lr': 9.546507575181108e-06, 'samples': 23349248, 'steps': 45603, 'batch_loss/train': 0.9452994496095926} +12/28/2021 23:15:29 - INFO - codeparrot_training - Step 45604: {'lr': 9.542193620034168e-06, 'samples': 23349760, 'steps': 45604, 'batch_loss/train': 0.6860243730479851} +12/28/2021 23:15:39 - INFO - codeparrot_training - Step 45605: {'lr': 9.537880620856698e-06, 'samples': 23350272, 'steps': 45605, 'batch_loss/train': 0.7597313541918993} +12/28/2021 23:15:51 - INFO - codeparrot_training - Step 45606: {'lr': 9.533568577665769e-06, 'samples': 23350784, 'steps': 45606, 'batch_loss/train': 0.7347802193835378} +12/28/2021 23:16:02 - INFO - codeparrot_training - Step 45607: {'lr': 9.52925749047856e-06, 'samples': 23351296, 'steps': 45607, 'batch_loss/train': 0.6418961966410279} +12/28/2021 23:16:13 - INFO - codeparrot_training - Step 45608: {'lr': 9.524947359312307e-06, 'samples': 23351808, 'steps': 45608, 'batch_loss/train': 0.8139610085636377} +12/28/2021 23:16:25 - INFO - codeparrot_training - Step 45609: {'lr': 9.520638184184e-06, 'samples': 23352320, 'steps': 45609, 'batch_loss/train': 0.6918171914294362} +12/28/2021 23:16:35 - INFO - codeparrot_training - Step 45610: {'lr': 9.516329965110843e-06, 'samples': 23352832, 'steps': 45610, 'batch_loss/train': 0.5782277407415677} +12/28/2021 23:16:46 - INFO - codeparrot_training - Step 45611: {'lr': 9.512022702109962e-06, 'samples': 23353344, 'steps': 45611, 'batch_loss/train': 0.6072481739684008} +12/28/2021 23:17:01 - INFO - codeparrot_training - Step 45612: {'lr': 9.507716395198457e-06, 'samples': 23353856, 'steps': 45612, 'batch_loss/train': 0.6238124272786081} +12/28/2021 23:17:12 - INFO - codeparrot_training - Step 45613: {'lr': 9.50341104439348e-06, 'samples': 23354368, 'steps': 45613, 'batch_loss/train': 0.6209913083002903} +12/28/2021 23:17:22 - INFO - codeparrot_training - Step 45614: {'lr': 9.499106649712124e-06, 'samples': 23354880, 'steps': 45614, 'batch_loss/train': 0.725394893437624} +12/28/2021 23:17:33 - INFO - codeparrot_training - Step 45615: {'lr': 9.494803211171522e-06, 'samples': 23355392, 'steps': 45615, 'batch_loss/train': 0.7576756123453379} +12/28/2021 23:17:45 - INFO - codeparrot_training - Step 45616: {'lr': 9.490500728788764e-06, 'samples': 23355904, 'steps': 45616, 'batch_loss/train': 0.6423772820271552} +12/28/2021 23:17:56 - INFO - codeparrot_training - Step 45617: {'lr': 9.486199202581009e-06, 'samples': 23356416, 'steps': 45617, 'batch_loss/train': 0.7412867750972509} +12/28/2021 23:18:06 - INFO - codeparrot_training - Step 45618: {'lr': 9.48189863256524e-06, 'samples': 23356928, 'steps': 45618, 'batch_loss/train': 0.7458846243098378} +12/28/2021 23:18:20 - INFO - codeparrot_training - Step 45619: {'lr': 9.477599018758637e-06, 'samples': 23357440, 'steps': 45619, 'batch_loss/train': 0.6256932428805158} +12/28/2021 23:18:31 - INFO - codeparrot_training - Step 45620: {'lr': 9.473300361178328e-06, 'samples': 23357952, 'steps': 45620, 'batch_loss/train': 0.7405675202608109} +12/28/2021 23:18:41 - INFO - codeparrot_training - Step 45621: {'lr': 9.469002659841325e-06, 'samples': 23358464, 'steps': 45621, 'batch_loss/train': 0.6912796157412231} +12/28/2021 23:18:54 - INFO - codeparrot_training - Step 45622: {'lr': 9.464705914764754e-06, 'samples': 23358976, 'steps': 45622, 'batch_loss/train': 0.6201828697812743} +12/28/2021 23:19:05 - INFO - codeparrot_training - Step 45623: {'lr': 9.460410125965686e-06, 'samples': 23359488, 'steps': 45623, 'batch_loss/train': 0.7391825839877129} +12/28/2021 23:19:15 - INFO - codeparrot_training - Step 45624: {'lr': 9.456115293461187e-06, 'samples': 23360000, 'steps': 45624, 'batch_loss/train': 0.7608282845467329} +12/28/2021 23:19:26 - INFO - codeparrot_training - Step 45625: {'lr': 9.451821417268387e-06, 'samples': 23360512, 'steps': 45625, 'batch_loss/train': 0.7373949401080608} +12/28/2021 23:19:38 - INFO - codeparrot_training - Step 45626: {'lr': 9.447528497404295e-06, 'samples': 23361024, 'steps': 45626, 'batch_loss/train': 0.999160663690418} +12/28/2021 23:19:49 - INFO - codeparrot_training - Step 45627: {'lr': 9.443236533885985e-06, 'samples': 23361536, 'steps': 45627, 'batch_loss/train': 0.7219428713433444} +12/28/2021 23:19:59 - INFO - codeparrot_training - Step 45628: {'lr': 9.438945526730581e-06, 'samples': 23362048, 'steps': 45628, 'batch_loss/train': 0.7007676232606173} +12/28/2021 23:20:13 - INFO - codeparrot_training - Step 45629: {'lr': 9.43465547595504e-06, 'samples': 23362560, 'steps': 45629, 'batch_loss/train': 0.6758406511507928} +12/28/2021 23:20:24 - INFO - codeparrot_training - Step 45630: {'lr': 9.430366381576488e-06, 'samples': 23363072, 'steps': 45630, 'batch_loss/train': 0.6485789329744875} +12/28/2021 23:20:35 - INFO - codeparrot_training - Step 45631: {'lr': 9.426078243612025e-06, 'samples': 23363584, 'steps': 45631, 'batch_loss/train': 0.6913343900814652} +12/28/2021 23:20:47 - INFO - codeparrot_training - Step 45632: {'lr': 9.421791062078578e-06, 'samples': 23364096, 'steps': 45632, 'batch_loss/train': 0.660549582913518} +12/28/2021 23:20:57 - INFO - codeparrot_training - Step 45633: {'lr': 9.417504836993218e-06, 'samples': 23364608, 'steps': 45633, 'batch_loss/train': 0.7324497466906905} +12/28/2021 23:21:08 - INFO - codeparrot_training - Step 45634: {'lr': 9.413219568373099e-06, 'samples': 23365120, 'steps': 45634, 'batch_loss/train': 0.6581174959428608} +12/28/2021 23:21:20 - INFO - codeparrot_training - Step 45635: {'lr': 9.408935256235152e-06, 'samples': 23365632, 'steps': 45635, 'batch_loss/train': 0.6791147187759634} +12/28/2021 23:21:31 - INFO - codeparrot_training - Step 45636: {'lr': 9.404651900596417e-06, 'samples': 23366144, 'steps': 45636, 'batch_loss/train': 0.6916749062947929} +12/28/2021 23:21:41 - INFO - codeparrot_training - Step 45637: {'lr': 9.400369501473938e-06, 'samples': 23366656, 'steps': 45637, 'batch_loss/train': 0.7643992078956217} +12/28/2021 23:21:52 - INFO - codeparrot_training - Step 45638: {'lr': 9.396088058884755e-06, 'samples': 23367168, 'steps': 45638, 'batch_loss/train': 0.5350519955682103} +12/28/2021 23:22:06 - INFO - codeparrot_training - Step 45639: {'lr': 9.391807572845885e-06, 'samples': 23367680, 'steps': 45639, 'batch_loss/train': 0.6372511379886419} +12/28/2021 23:22:17 - INFO - codeparrot_training - Step 45640: {'lr': 9.38752804337431e-06, 'samples': 23368192, 'steps': 45640, 'batch_loss/train': 0.6865973090752959} +12/28/2021 23:22:27 - INFO - codeparrot_training - Step 45641: {'lr': 9.383249470487104e-06, 'samples': 23368704, 'steps': 45641, 'batch_loss/train': 0.580607486423105} +12/28/2021 23:22:40 - INFO - codeparrot_training - Step 45642: {'lr': 9.378971854201223e-06, 'samples': 23369216, 'steps': 45642, 'batch_loss/train': 0.6176190366968513} +12/28/2021 23:22:51 - INFO - codeparrot_training - Step 45643: {'lr': 9.374695194533739e-06, 'samples': 23369728, 'steps': 45643, 'batch_loss/train': 0.8130861995741725} +12/28/2021 23:23:01 - INFO - codeparrot_training - Step 45644: {'lr': 9.370419491501525e-06, 'samples': 23370240, 'steps': 45644, 'batch_loss/train': 0.8792521012946963} +12/28/2021 23:23:12 - INFO - codeparrot_training - Step 45645: {'lr': 9.366144745121735e-06, 'samples': 23370752, 'steps': 45645, 'batch_loss/train': 1.2583612110465765} +12/28/2021 23:23:26 - INFO - codeparrot_training - Step 45646: {'lr': 9.3618709554113e-06, 'samples': 23371264, 'steps': 45646, 'batch_loss/train': 0.7192447246052325} +12/28/2021 23:23:36 - INFO - codeparrot_training - Step 45647: {'lr': 9.357598122387151e-06, 'samples': 23371776, 'steps': 45647, 'batch_loss/train': 0.7739065974019468} +12/28/2021 23:23:47 - INFO - codeparrot_training - Step 45648: {'lr': 9.353326246066358e-06, 'samples': 23372288, 'steps': 45648, 'batch_loss/train': 0.7645624531432986} +12/28/2021 23:23:59 - INFO - codeparrot_training - Step 45649: {'lr': 9.349055326465877e-06, 'samples': 23372800, 'steps': 45649, 'batch_loss/train': 0.6879671206697822} +12/28/2021 23:24:10 - INFO - codeparrot_training - Step 45650: {'lr': 9.34478536360267e-06, 'samples': 23373312, 'steps': 45650, 'batch_loss/train': 0.7199685946106911} +12/28/2021 23:24:21 - INFO - codeparrot_training - Step 45651: {'lr': 9.34051635749375e-06, 'samples': 23373824, 'steps': 45651, 'batch_loss/train': 0.5655174722196534} +12/28/2021 23:24:33 - INFO - codeparrot_training - Step 45652: {'lr': 9.336248308156048e-06, 'samples': 23374336, 'steps': 45652, 'batch_loss/train': 0.6953707197681069} +12/28/2021 23:24:43 - INFO - codeparrot_training - Step 45653: {'lr': 9.331981215606578e-06, 'samples': 23374848, 'steps': 45653, 'batch_loss/train': 0.7377681015059352} +12/28/2021 23:24:54 - INFO - codeparrot_training - Step 45654: {'lr': 9.327715079862242e-06, 'samples': 23375360, 'steps': 45654, 'batch_loss/train': 0.7305877958424389} +12/28/2021 23:25:04 - INFO - codeparrot_training - Step 45655: {'lr': 9.323449900940057e-06, 'samples': 23375872, 'steps': 45655, 'batch_loss/train': 0.7463720161467791} +12/28/2021 23:25:17 - INFO - codeparrot_training - Step 45656: {'lr': 9.31918567885695e-06, 'samples': 23376384, 'steps': 45656, 'batch_loss/train': 0.7640552613884211} +12/28/2021 23:25:28 - INFO - codeparrot_training - Step 45657: {'lr': 9.314922413629939e-06, 'samples': 23376896, 'steps': 45657, 'batch_loss/train': 0.8841690360568464} +12/28/2021 23:25:38 - INFO - codeparrot_training - Step 45658: {'lr': 9.31066010527587e-06, 'samples': 23377408, 'steps': 45658, 'batch_loss/train': 1.3830066239461303} +12/28/2021 23:25:52 - INFO - codeparrot_training - Step 45659: {'lr': 9.306398753811702e-06, 'samples': 23377920, 'steps': 45659, 'batch_loss/train': 2.054842656478286} +12/28/2021 23:26:03 - INFO - codeparrot_training - Step 45660: {'lr': 9.302138359254448e-06, 'samples': 23378432, 'steps': 45660, 'batch_loss/train': 0.5794867167714983} +12/28/2021 23:26:14 - INFO - codeparrot_training - Step 45661: {'lr': 9.297878921621011e-06, 'samples': 23378944, 'steps': 45661, 'batch_loss/train': 0.6261985418386757} +12/28/2021 23:26:24 - INFO - codeparrot_training - Step 45662: {'lr': 9.293620440928323e-06, 'samples': 23379456, 'steps': 45662, 'batch_loss/train': 0.6808488513343036} +12/28/2021 23:26:37 - INFO - codeparrot_training - Step 45663: {'lr': 9.28936291719329e-06, 'samples': 23379968, 'steps': 45663, 'batch_loss/train': 1.0043423939496279} +12/28/2021 23:26:47 - INFO - codeparrot_training - Step 45664: {'lr': 9.285106350432865e-06, 'samples': 23380480, 'steps': 45664, 'batch_loss/train': 0.7544946996495128} +12/28/2021 23:26:58 - INFO - codeparrot_training - Step 45665: {'lr': 9.280850740663982e-06, 'samples': 23380992, 'steps': 45665, 'batch_loss/train': 0.7164391689002514} +12/28/2021 23:27:10 - INFO - codeparrot_training - Step 45666: {'lr': 9.276596087903544e-06, 'samples': 23381504, 'steps': 45666, 'batch_loss/train': 0.6841828944161534} +12/28/2021 23:27:21 - INFO - codeparrot_training - Step 45667: {'lr': 9.272342392168454e-06, 'samples': 23382016, 'steps': 45667, 'batch_loss/train': 0.6801051227375865} +12/28/2021 23:27:31 - INFO - codeparrot_training - Step 45668: {'lr': 9.268089653475643e-06, 'samples': 23382528, 'steps': 45668, 'batch_loss/train': 0.6488636735011823} +12/28/2021 23:27:45 - INFO - codeparrot_training - Step 45669: {'lr': 9.26383787184204e-06, 'samples': 23383040, 'steps': 45669, 'batch_loss/train': 1.606963119469583} +12/28/2021 23:27:56 - INFO - codeparrot_training - Step 45670: {'lr': 9.259587047284467e-06, 'samples': 23383552, 'steps': 45670, 'batch_loss/train': 0.7359208134002984} +12/28/2021 23:28:07 - INFO - codeparrot_training - Step 45671: {'lr': 9.255337179819884e-06, 'samples': 23384064, 'steps': 45671, 'batch_loss/train': 0.7418794147670269} +12/28/2021 23:28:17 - INFO - codeparrot_training - Step 45672: {'lr': 9.251088269465218e-06, 'samples': 23384576, 'steps': 45672, 'batch_loss/train': 0.7959502246230841} +12/28/2021 23:28:29 - INFO - codeparrot_training - Step 45673: {'lr': 9.246840316237293e-06, 'samples': 23385088, 'steps': 45673, 'batch_loss/train': 0.7443661349825561} +12/28/2021 23:28:40 - INFO - codeparrot_training - Step 45674: {'lr': 9.242593320153036e-06, 'samples': 23385600, 'steps': 45674, 'batch_loss/train': 0.6388361863791943} +12/28/2021 23:28:51 - INFO - codeparrot_training - Step 45675: {'lr': 9.238347281229326e-06, 'samples': 23386112, 'steps': 45675, 'batch_loss/train': 0.8357615671120584} +12/28/2021 23:29:03 - INFO - codeparrot_training - Step 45676: {'lr': 9.234102199483007e-06, 'samples': 23386624, 'steps': 45676, 'batch_loss/train': 0.642466540215537} +12/28/2021 23:29:14 - INFO - codeparrot_training - Step 45677: {'lr': 9.229858074931014e-06, 'samples': 23387136, 'steps': 45677, 'batch_loss/train': 0.6826358947437257} +12/28/2021 23:29:25 - INFO - codeparrot_training - Step 45678: {'lr': 9.22561490759019e-06, 'samples': 23387648, 'steps': 45678, 'batch_loss/train': 0.736713427118957} +12/28/2021 23:29:39 - INFO - codeparrot_training - Step 45679: {'lr': 9.221372697477415e-06, 'samples': 23388160, 'steps': 45679, 'batch_loss/train': 0.6540428372099996} +12/28/2021 23:29:49 - INFO - codeparrot_training - Step 45680: {'lr': 9.217131444609534e-06, 'samples': 23388672, 'steps': 45680, 'batch_loss/train': 0.7337281135842204} +12/28/2021 23:30:00 - INFO - codeparrot_training - Step 45681: {'lr': 9.212891149003422e-06, 'samples': 23389184, 'steps': 45681, 'batch_loss/train': 0.807762417010963} +12/28/2021 23:30:12 - INFO - codeparrot_training - Step 45682: {'lr': 9.208651810675955e-06, 'samples': 23389696, 'steps': 45682, 'batch_loss/train': 0.6793023595237173} +12/28/2021 23:30:23 - INFO - codeparrot_training - Step 45683: {'lr': 9.204413429643981e-06, 'samples': 23390208, 'steps': 45683, 'batch_loss/train': 0.6748284557834268} +12/28/2021 23:30:33 - INFO - codeparrot_training - Step 45684: {'lr': 9.200176005924321e-06, 'samples': 23390720, 'steps': 45684, 'batch_loss/train': 0.7631050711497664} +12/28/2021 23:30:44 - INFO - codeparrot_training - Step 45685: {'lr': 9.195939539533793e-06, 'samples': 23391232, 'steps': 45685, 'batch_loss/train': 0.6646473980508745} +12/28/2021 23:30:56 - INFO - codeparrot_training - Step 45686: {'lr': 9.191704030489356e-06, 'samples': 23391744, 'steps': 45686, 'batch_loss/train': 0.6441572655458003} +12/28/2021 23:31:07 - INFO - codeparrot_training - Step 45687: {'lr': 9.187469478807747e-06, 'samples': 23392256, 'steps': 45687, 'batch_loss/train': 0.6839543762616813} +12/28/2021 23:31:17 - INFO - codeparrot_training - Step 45688: {'lr': 9.183235884505786e-06, 'samples': 23392768, 'steps': 45688, 'batch_loss/train': 0.7095047202892601} +12/28/2021 23:31:31 - INFO - codeparrot_training - Step 45689: {'lr': 9.179003247600432e-06, 'samples': 23393280, 'steps': 45689, 'batch_loss/train': 0.7823447426781058} +12/28/2021 23:31:42 - INFO - codeparrot_training - Step 45690: {'lr': 9.174771568108392e-06, 'samples': 23393792, 'steps': 45690, 'batch_loss/train': 0.6454932179767638} +12/28/2021 23:31:53 - INFO - codeparrot_training - Step 45691: {'lr': 9.170540846046543e-06, 'samples': 23394304, 'steps': 45691, 'batch_loss/train': 0.6548075219616294} +12/28/2021 23:32:05 - INFO - codeparrot_training - Step 45692: {'lr': 9.166311081431678e-06, 'samples': 23394816, 'steps': 45692, 'batch_loss/train': 0.8259477620013058} +12/28/2021 23:32:16 - INFO - codeparrot_training - Step 45693: {'lr': 9.162082274280614e-06, 'samples': 23395328, 'steps': 45693, 'batch_loss/train': 0.7378309040796012} +12/28/2021 23:32:26 - INFO - codeparrot_training - Step 45694: {'lr': 9.157854424610173e-06, 'samples': 23395840, 'steps': 45694, 'batch_loss/train': 0.7966404408216476} +12/28/2021 23:32:37 - INFO - codeparrot_training - Step 45695: {'lr': 9.153627532437203e-06, 'samples': 23396352, 'steps': 45695, 'batch_loss/train': 0.6771405348554254} +12/28/2021 23:32:49 - INFO - codeparrot_training - Step 45696: {'lr': 9.149401597778412e-06, 'samples': 23396864, 'steps': 45696, 'batch_loss/train': 0.7472879849374294} +12/28/2021 23:33:00 - INFO - codeparrot_training - Step 45697: {'lr': 9.145176620650703e-06, 'samples': 23397376, 'steps': 45697, 'batch_loss/train': 0.7197505026124418} +12/28/2021 23:33:10 - INFO - codeparrot_training - Step 45698: {'lr': 9.14095260107084e-06, 'samples': 23397888, 'steps': 45698, 'batch_loss/train': 0.7265005265944637} +12/28/2021 23:33:24 - INFO - codeparrot_training - Step 45699: {'lr': 9.136729539055589e-06, 'samples': 23398400, 'steps': 45699, 'batch_loss/train': 0.7111508082598448} +12/28/2021 23:33:35 - INFO - codeparrot_training - Step 45700: {'lr': 9.132507434621711e-06, 'samples': 23398912, 'steps': 45700, 'batch_loss/train': 0.7239737212657928} +12/28/2021 23:33:45 - INFO - codeparrot_training - Step 45701: {'lr': 9.128286287786087e-06, 'samples': 23399424, 'steps': 45701, 'batch_loss/train': 0.6755588632076979} +12/28/2021 23:33:57 - INFO - codeparrot_training - Step 45702: {'lr': 9.12406609856542e-06, 'samples': 23399936, 'steps': 45702, 'batch_loss/train': 0.655054347589612} +12/28/2021 23:34:08 - INFO - codeparrot_training - Step 45703: {'lr': 9.119846866976534e-06, 'samples': 23400448, 'steps': 45703, 'batch_loss/train': 0.6968518955400214} +12/28/2021 23:34:19 - INFO - codeparrot_training - Step 45704: {'lr': 9.11562859303619e-06, 'samples': 23400960, 'steps': 45704, 'batch_loss/train': 0.6701799726579338} +12/28/2021 23:34:29 - INFO - codeparrot_training - Step 45705: {'lr': 9.111411276761128e-06, 'samples': 23401472, 'steps': 45705, 'batch_loss/train': 0.7167601310648024} +12/28/2021 23:34:43 - INFO - codeparrot_training - Step 45706: {'lr': 9.107194918168138e-06, 'samples': 23401984, 'steps': 45706, 'batch_loss/train': 0.7171092797070742} +12/28/2021 23:34:54 - INFO - codeparrot_training - Step 45707: {'lr': 9.102979517273985e-06, 'samples': 23402496, 'steps': 45707, 'batch_loss/train': 0.6785991858923808} +12/28/2021 23:35:05 - INFO - codeparrot_training - Step 45708: {'lr': 9.098765074095433e-06, 'samples': 23403008, 'steps': 45708, 'batch_loss/train': 0.7671624585054815} +12/28/2021 23:35:17 - INFO - codeparrot_training - Step 45709: {'lr': 9.094551588649247e-06, 'samples': 23403520, 'steps': 45709, 'batch_loss/train': 0.983222461072728} +12/28/2021 23:35:28 - INFO - codeparrot_training - Step 45710: {'lr': 9.090339060952162e-06, 'samples': 23404032, 'steps': 45710, 'batch_loss/train': 0.6824026592075825} +12/28/2021 23:35:39 - INFO - codeparrot_training - Step 45711: {'lr': 9.08612749102089e-06, 'samples': 23404544, 'steps': 45711, 'batch_loss/train': 0.5227016085991636} +12/28/2021 23:35:51 - INFO - codeparrot_training - Step 45712: {'lr': 9.081916878872249e-06, 'samples': 23405056, 'steps': 45712, 'batch_loss/train': 0.7339955167844892} +12/28/2021 23:36:01 - INFO - codeparrot_training - Step 45713: {'lr': 9.077707224522919e-06, 'samples': 23405568, 'steps': 45713, 'batch_loss/train': 0.8000295693054795} +12/28/2021 23:36:12 - INFO - codeparrot_training - Step 45714: {'lr': 9.073498527989638e-06, 'samples': 23406080, 'steps': 45714, 'batch_loss/train': 0.7501046070829034} +12/28/2021 23:36:23 - INFO - codeparrot_training - Step 45715: {'lr': 9.069290789289197e-06, 'samples': 23406592, 'steps': 45715, 'batch_loss/train': 0.700144310016185} +12/28/2021 23:36:35 - INFO - codeparrot_training - Step 45716: {'lr': 9.065084008438251e-06, 'samples': 23407104, 'steps': 45716, 'batch_loss/train': 0.661013575270772} +12/28/2021 23:36:45 - INFO - codeparrot_training - Step 45717: {'lr': 9.060878185453591e-06, 'samples': 23407616, 'steps': 45717, 'batch_loss/train': 0.7731685768812895} +12/28/2021 23:36:56 - INFO - codeparrot_training - Step 45718: {'lr': 9.056673320351871e-06, 'samples': 23408128, 'steps': 45718, 'batch_loss/train': 0.8452318198978901} +12/28/2021 23:37:10 - INFO - codeparrot_training - Step 45719: {'lr': 9.052469413149855e-06, 'samples': 23408640, 'steps': 45719, 'batch_loss/train': 0.5823256080038846} +12/28/2021 23:37:21 - INFO - codeparrot_training - Step 45720: {'lr': 9.048266463864224e-06, 'samples': 23409152, 'steps': 45720, 'batch_loss/train': 0.83658342435956} +12/28/2021 23:37:31 - INFO - codeparrot_training - Step 45721: {'lr': 9.04406447251177e-06, 'samples': 23409664, 'steps': 45721, 'batch_loss/train': 0.6175441695377231} +12/28/2021 23:37:43 - INFO - codeparrot_training - Step 45722: {'lr': 9.039863439109036e-06, 'samples': 23410176, 'steps': 45722, 'batch_loss/train': 0.8399863122031093} +12/28/2021 23:37:54 - INFO - codeparrot_training - Step 45723: {'lr': 9.035663363672868e-06, 'samples': 23410688, 'steps': 45723, 'batch_loss/train': 0.7765606194734573} +12/28/2021 23:38:05 - INFO - codeparrot_training - Step 45724: {'lr': 9.03146424621995e-06, 'samples': 23411200, 'steps': 45724, 'batch_loss/train': 0.780063194106333} +12/28/2021 23:38:15 - INFO - codeparrot_training - Step 45725: {'lr': 9.027266086766905e-06, 'samples': 23411712, 'steps': 45725, 'batch_loss/train': 0.7415765612386167} +12/28/2021 23:38:27 - INFO - codeparrot_training - Step 45726: {'lr': 9.02306888533047e-06, 'samples': 23412224, 'steps': 45726, 'batch_loss/train': 0.7161385822109878} +12/28/2021 23:38:38 - INFO - codeparrot_training - Step 45727: {'lr': 9.018872641927356e-06, 'samples': 23412736, 'steps': 45727, 'batch_loss/train': 0.7153073288500309} +12/28/2021 23:38:49 - INFO - codeparrot_training - Step 45728: {'lr': 9.014677356574187e-06, 'samples': 23413248, 'steps': 45728, 'batch_loss/train': 0.675354699138552} +12/28/2021 23:39:03 - INFO - codeparrot_training - Step 45729: {'lr': 9.010483029287641e-06, 'samples': 23413760, 'steps': 45729, 'batch_loss/train': 0.7658509109169245} +12/28/2021 23:39:13 - INFO - codeparrot_training - Step 45730: {'lr': 9.00628966008446e-06, 'samples': 23414272, 'steps': 45730, 'batch_loss/train': 0.856325819157064} +12/28/2021 23:39:24 - INFO - codeparrot_training - Step 45731: {'lr': 9.002097248981238e-06, 'samples': 23414784, 'steps': 45731, 'batch_loss/train': 0.7716564573347569} +12/28/2021 23:39:36 - INFO - codeparrot_training - Step 45732: {'lr': 8.997905795994715e-06, 'samples': 23415296, 'steps': 45732, 'batch_loss/train': 0.6731334747746587} +12/28/2021 23:39:47 - INFO - codeparrot_training - Step 45733: {'lr': 8.993715301141513e-06, 'samples': 23415808, 'steps': 45733, 'batch_loss/train': 0.7484898846596479} +12/28/2021 23:39:57 - INFO - codeparrot_training - Step 45734: {'lr': 8.98952576443829e-06, 'samples': 23416320, 'steps': 45734, 'batch_loss/train': 0.8048099633306265} +12/28/2021 23:40:10 - INFO - codeparrot_training - Step 45735: {'lr': 8.985337185901694e-06, 'samples': 23416832, 'steps': 45735, 'batch_loss/train': 0.6611016753013246} +12/28/2021 23:40:21 - INFO - codeparrot_training - Step 45736: {'lr': 8.981149565548436e-06, 'samples': 23417344, 'steps': 45736, 'batch_loss/train': 0.675698540173471} +12/28/2021 23:40:32 - INFO - codeparrot_training - Step 45737: {'lr': 8.976962903395085e-06, 'samples': 23417856, 'steps': 45737, 'batch_loss/train': 0.7226616684347391} +12/28/2021 23:40:42 - INFO - codeparrot_training - Step 45738: {'lr': 8.972777199458355e-06, 'samples': 23418368, 'steps': 45738, 'batch_loss/train': 0.789858955424279} +12/28/2021 23:40:54 - INFO - codeparrot_training - Step 45739: {'lr': 8.968592453754837e-06, 'samples': 23418880, 'steps': 45739, 'batch_loss/train': 0.6596303957048804} +12/28/2021 23:41:05 - INFO - codeparrot_training - Step 45740: {'lr': 8.96440866630116e-06, 'samples': 23419392, 'steps': 45740, 'batch_loss/train': 0.8123812042176723} +12/28/2021 23:41:16 - INFO - codeparrot_training - Step 45741: {'lr': 8.960225837114033e-06, 'samples': 23419904, 'steps': 45741, 'batch_loss/train': 0.8464869596064091} +12/28/2021 23:41:28 - INFO - codeparrot_training - Step 45742: {'lr': 8.956043966210025e-06, 'samples': 23420416, 'steps': 45742, 'batch_loss/train': 0.7494609635323286} +12/28/2021 23:41:38 - INFO - codeparrot_training - Step 45743: {'lr': 8.951863053605736e-06, 'samples': 23420928, 'steps': 45743, 'batch_loss/train': 0.8619276685640216} +12/28/2021 23:41:49 - INFO - codeparrot_training - Step 45744: {'lr': 8.947683099317845e-06, 'samples': 23421440, 'steps': 45744, 'batch_loss/train': 0.7867833925411105} +12/28/2021 23:42:03 - INFO - codeparrot_training - Step 45745: {'lr': 8.943504103362949e-06, 'samples': 23421952, 'steps': 45745, 'batch_loss/train': 0.7399503854103386} +12/28/2021 23:42:14 - INFO - codeparrot_training - Step 45746: {'lr': 8.939326065757647e-06, 'samples': 23422464, 'steps': 45746, 'batch_loss/train': 0.723742539063096} +12/28/2021 23:42:24 - INFO - codeparrot_training - Step 45747: {'lr': 8.935148986518621e-06, 'samples': 23422976, 'steps': 45747, 'batch_loss/train': 0.737531915307045} +12/28/2021 23:42:35 - INFO - codeparrot_training - Step 45748: {'lr': 8.930972865662329e-06, 'samples': 23423488, 'steps': 45748, 'batch_loss/train': 0.7275257240980864} +12/28/2021 23:42:47 - INFO - codeparrot_training - Step 45749: {'lr': 8.926797703205508e-06, 'samples': 23424000, 'steps': 45749, 'batch_loss/train': 0.6204302771948278} +12/28/2021 23:42:58 - INFO - codeparrot_training - Step 45750: {'lr': 8.922623499164755e-06, 'samples': 23424512, 'steps': 45750, 'batch_loss/train': 0.7087686480954289} +12/28/2021 23:43:08 - INFO - codeparrot_training - Step 45751: {'lr': 8.91845025355656e-06, 'samples': 23425024, 'steps': 45751, 'batch_loss/train': 0.6841423632577062} +12/28/2021 23:43:21 - INFO - codeparrot_training - Step 45752: {'lr': 8.9142779663976e-06, 'samples': 23425536, 'steps': 45752, 'batch_loss/train': 0.7587518191430718} +12/28/2021 23:43:31 - INFO - codeparrot_training - Step 45753: {'lr': 8.910106637704473e-06, 'samples': 23426048, 'steps': 45753, 'batch_loss/train': 0.7785979760810733} +12/28/2021 23:43:42 - INFO - codeparrot_training - Step 45754: {'lr': 8.905936267493697e-06, 'samples': 23426560, 'steps': 45754, 'batch_loss/train': 0.8346336963586509} +12/28/2021 23:43:56 - INFO - codeparrot_training - Step 45755: {'lr': 8.901766855781839e-06, 'samples': 23427072, 'steps': 45755, 'batch_loss/train': 0.7318960186094046} +12/28/2021 23:44:07 - INFO - codeparrot_training - Step 45756: {'lr': 8.89759840258561e-06, 'samples': 23427584, 'steps': 45756, 'batch_loss/train': 0.8554856274276972} +12/28/2021 23:44:18 - INFO - codeparrot_training - Step 45757: {'lr': 8.89343090792144e-06, 'samples': 23428096, 'steps': 45757, 'batch_loss/train': 0.7875574491918087} +12/28/2021 23:44:28 - INFO - codeparrot_training - Step 45758: {'lr': 8.889264371805955e-06, 'samples': 23428608, 'steps': 45758, 'batch_loss/train': 1.1745776245370507} +12/28/2021 23:44:40 - INFO - codeparrot_training - Step 45759: {'lr': 8.885098794255726e-06, 'samples': 23429120, 'steps': 45759, 'batch_loss/train': 0.7839590637013316} +12/28/2021 23:44:51 - INFO - codeparrot_training - Step 45760: {'lr': 8.880934175287292e-06, 'samples': 23429632, 'steps': 45760, 'batch_loss/train': 0.8036775500513613} +12/28/2021 23:45:02 - INFO - codeparrot_training - Step 45761: {'lr': 8.876770514917226e-06, 'samples': 23430144, 'steps': 45761, 'batch_loss/train': 0.7321151616051793} +12/28/2021 23:45:14 - INFO - codeparrot_training - Step 45762: {'lr': 8.872607813162098e-06, 'samples': 23430656, 'steps': 45762, 'batch_loss/train': 0.7551571857184172} +12/28/2021 23:45:24 - INFO - codeparrot_training - Step 45763: {'lr': 8.868446070038394e-06, 'samples': 23431168, 'steps': 45763, 'batch_loss/train': 0.7605020962655544} +12/28/2021 23:45:35 - INFO - codeparrot_training - Step 45764: {'lr': 8.864285285562767e-06, 'samples': 23431680, 'steps': 45764, 'batch_loss/train': 0.7230873056687415} +12/28/2021 23:45:49 - INFO - codeparrot_training - Step 45765: {'lr': 8.86012545975165e-06, 'samples': 23432192, 'steps': 45765, 'batch_loss/train': 0.7346288450062275} +12/28/2021 23:46:00 - INFO - codeparrot_training - Step 45766: {'lr': 8.855966592621584e-06, 'samples': 23432704, 'steps': 45766, 'batch_loss/train': 0.7204971846658736} +12/28/2021 23:46:10 - INFO - codeparrot_training - Step 45767: {'lr': 8.851808684189194e-06, 'samples': 23433216, 'steps': 45767, 'batch_loss/train': 0.6267958502285182} +12/28/2021 23:46:21 - INFO - codeparrot_training - Step 45768: {'lr': 8.847651734470941e-06, 'samples': 23433728, 'steps': 45768, 'batch_loss/train': 0.7281816594768316} +12/28/2021 23:46:34 - INFO - codeparrot_training - Step 45769: {'lr': 8.843495743483394e-06, 'samples': 23434240, 'steps': 45769, 'batch_loss/train': 0.6995853413827717} +12/28/2021 23:46:44 - INFO - codeparrot_training - Step 45770: {'lr': 8.83934071124301e-06, 'samples': 23434752, 'steps': 45770, 'batch_loss/train': 0.5573684854898602} +12/28/2021 23:46:55 - INFO - codeparrot_training - Step 45771: {'lr': 8.835186637766363e-06, 'samples': 23435264, 'steps': 45771, 'batch_loss/train': 0.6001102142035961} +12/28/2021 23:47:07 - INFO - codeparrot_training - Step 45772: {'lr': 8.831033523069964e-06, 'samples': 23435776, 'steps': 45772, 'batch_loss/train': 0.7433645238634199} +12/28/2021 23:47:18 - INFO - codeparrot_training - Step 45773: {'lr': 8.826881367170304e-06, 'samples': 23436288, 'steps': 45773, 'batch_loss/train': 0.7347879535518587} +12/28/2021 23:47:28 - INFO - codeparrot_training - Step 45774: {'lr': 8.822730170083892e-06, 'samples': 23436800, 'steps': 45774, 'batch_loss/train': 0.8300438597798347} +12/28/2021 23:47:39 - INFO - codeparrot_training - Step 45775: {'lr': 8.818579931827276e-06, 'samples': 23437312, 'steps': 45775, 'batch_loss/train': 0.6826746929436922} +12/28/2021 23:47:53 - INFO - codeparrot_training - Step 45776: {'lr': 8.814430652416911e-06, 'samples': 23437824, 'steps': 45776, 'batch_loss/train': 0.7265763003379107} +12/28/2021 23:48:04 - INFO - codeparrot_training - Step 45777: {'lr': 8.810282331869257e-06, 'samples': 23438336, 'steps': 45777, 'batch_loss/train': 0.8521785461343825} +12/28/2021 23:48:14 - INFO - codeparrot_training - Step 45778: {'lr': 8.806134970200886e-06, 'samples': 23438848, 'steps': 45778, 'batch_loss/train': 0.7690949807874858} +12/28/2021 23:48:26 - INFO - codeparrot_training - Step 45779: {'lr': 8.801988567428254e-06, 'samples': 23439360, 'steps': 45779, 'batch_loss/train': 0.6830847151577473} +12/28/2021 23:48:37 - INFO - codeparrot_training - Step 45780: {'lr': 8.79784312356785e-06, 'samples': 23439872, 'steps': 45780, 'batch_loss/train': 0.8399868464330211} +12/28/2021 23:48:48 - INFO - codeparrot_training - Step 45781: {'lr': 8.793698638636077e-06, 'samples': 23440384, 'steps': 45781, 'batch_loss/train': 0.7132937116548419} +12/28/2021 23:49:02 - INFO - codeparrot_training - Step 45782: {'lr': 8.789555112649588e-06, 'samples': 23440896, 'steps': 45782, 'batch_loss/train': 0.7050235556671396} +12/28/2021 23:49:12 - INFO - codeparrot_training - Step 45783: {'lr': 8.785412545624677e-06, 'samples': 23441408, 'steps': 45783, 'batch_loss/train': 0.6305895007681102} +12/28/2021 23:49:23 - INFO - codeparrot_training - Step 45784: {'lr': 8.781270937577913e-06, 'samples': 23441920, 'steps': 45784, 'batch_loss/train': 0.6661429964005947} +12/28/2021 23:49:34 - INFO - codeparrot_training - Step 45785: {'lr': 8.777130288525725e-06, 'samples': 23442432, 'steps': 45785, 'batch_loss/train': 0.7267395202070475} +12/28/2021 23:49:46 - INFO - codeparrot_training - Step 45786: {'lr': 8.772990598484603e-06, 'samples': 23442944, 'steps': 45786, 'batch_loss/train': 0.7460448949132115} +12/28/2021 23:49:57 - INFO - codeparrot_training - Step 45787: {'lr': 8.76885186747095e-06, 'samples': 23443456, 'steps': 45787, 'batch_loss/train': 0.578784310258925} +12/28/2021 23:50:07 - INFO - codeparrot_training - Step 45788: {'lr': 8.76471409550128e-06, 'samples': 23443968, 'steps': 45788, 'batch_loss/train': 0.7454724656417966} +12/28/2021 23:50:20 - INFO - codeparrot_training - Step 45789: {'lr': 8.760577282592025e-06, 'samples': 23444480, 'steps': 45789, 'batch_loss/train': 0.7363296948606148} +12/28/2021 23:50:30 - INFO - codeparrot_training - Step 45790: {'lr': 8.756441428759615e-06, 'samples': 23444992, 'steps': 45790, 'batch_loss/train': 0.7400999299134128} +12/28/2021 23:50:41 - INFO - codeparrot_training - Step 45791: {'lr': 8.752306534020538e-06, 'samples': 23445504, 'steps': 45791, 'batch_loss/train': 0.6896647778339684} +12/28/2021 23:50:53 - INFO - codeparrot_training - Step 45792: {'lr': 8.74817259839114e-06, 'samples': 23446016, 'steps': 45792, 'batch_loss/train': 0.7378191389143467} +12/28/2021 23:51:04 - INFO - codeparrot_training - Step 45793: {'lr': 8.744039621887968e-06, 'samples': 23446528, 'steps': 45793, 'batch_loss/train': 0.6955171707086265} +12/28/2021 23:51:14 - INFO - codeparrot_training - Step 45794: {'lr': 8.739907604527392e-06, 'samples': 23447040, 'steps': 45794, 'batch_loss/train': 0.7375292298384011} +12/28/2021 23:51:25 - INFO - codeparrot_training - Step 45795: {'lr': 8.735776546325819e-06, 'samples': 23447552, 'steps': 45795, 'batch_loss/train': 0.7387779235141352} +12/28/2021 23:51:39 - INFO - codeparrot_training - Step 45796: {'lr': 8.731646447299707e-06, 'samples': 23448064, 'steps': 45796, 'batch_loss/train': 0.7852585259824991} +12/28/2021 23:51:50 - INFO - codeparrot_training - Step 45797: {'lr': 8.727517307465488e-06, 'samples': 23448576, 'steps': 45797, 'batch_loss/train': 0.7367368647828698} +12/28/2021 23:52:00 - INFO - codeparrot_training - Step 45798: {'lr': 8.723389126839537e-06, 'samples': 23449088, 'steps': 45798, 'batch_loss/train': 0.571079739369452} +12/28/2021 23:52:12 - INFO - codeparrot_training - Step 45799: {'lr': 8.719261905438313e-06, 'samples': 23449600, 'steps': 45799, 'batch_loss/train': 0.6757987008895725} +12/28/2021 23:52:23 - INFO - codeparrot_training - Step 45800: {'lr': 8.715135643278165e-06, 'samples': 23450112, 'steps': 45800, 'batch_loss/train': 0.7899298798292875} +12/28/2021 23:52:34 - INFO - codeparrot_training - Step 45801: {'lr': 8.711010340375552e-06, 'samples': 23450624, 'steps': 45801, 'batch_loss/train': 0.723763111163862} +12/28/2021 23:52:46 - INFO - codeparrot_training - Step 45802: {'lr': 8.706885996746905e-06, 'samples': 23451136, 'steps': 45802, 'batch_loss/train': 0.6058484862442128} +12/28/2021 23:52:57 - INFO - codeparrot_training - Step 45803: {'lr': 8.702762612408489e-06, 'samples': 23451648, 'steps': 45803, 'batch_loss/train': 0.7058139662258327} +12/28/2021 23:53:07 - INFO - codeparrot_training - Step 45804: {'lr': 8.698640187376788e-06, 'samples': 23452160, 'steps': 45804, 'batch_loss/train': 0.7231985640246421} +12/28/2021 23:53:18 - INFO - codeparrot_training - Step 45805: {'lr': 8.694518721668238e-06, 'samples': 23452672, 'steps': 45805, 'batch_loss/train': 0.5638486123061739} +12/28/2021 23:53:32 - INFO - codeparrot_training - Step 45806: {'lr': 8.690398215299128e-06, 'samples': 23453184, 'steps': 45806, 'batch_loss/train': 0.7663700794801116} +12/28/2021 23:53:43 - INFO - codeparrot_training - Step 45807: {'lr': 8.686278668285863e-06, 'samples': 23453696, 'steps': 45807, 'batch_loss/train': 0.771585582755506} +12/28/2021 23:53:53 - INFO - codeparrot_training - Step 45808: {'lr': 8.682160080644874e-06, 'samples': 23454208, 'steps': 45808, 'batch_loss/train': 0.6931608598679304} +12/28/2021 23:54:06 - INFO - codeparrot_training - Step 45809: {'lr': 8.678042452392481e-06, 'samples': 23454720, 'steps': 45809, 'batch_loss/train': 0.6683413884602487} +12/28/2021 23:54:16 - INFO - codeparrot_training - Step 45810: {'lr': 8.673925783545089e-06, 'samples': 23455232, 'steps': 45810, 'batch_loss/train': 0.7298377160914242} +12/28/2021 23:54:27 - INFO - codeparrot_training - Step 45811: {'lr': 8.669810074119017e-06, 'samples': 23455744, 'steps': 45811, 'batch_loss/train': 0.7545626806095243} +12/28/2021 23:54:39 - INFO - codeparrot_training - Step 45812: {'lr': 8.665695324130695e-06, 'samples': 23456256, 'steps': 45812, 'batch_loss/train': 0.6831257847952656} +12/28/2021 23:54:50 - INFO - codeparrot_training - Step 45813: {'lr': 8.661581533596418e-06, 'samples': 23456768, 'steps': 45813, 'batch_loss/train': 0.790289968252182} +12/28/2021 23:55:00 - INFO - codeparrot_training - Step 45814: {'lr': 8.65746870253256e-06, 'samples': 23457280, 'steps': 45814, 'batch_loss/train': 0.753431857097894} +12/28/2021 23:55:11 - INFO - codeparrot_training - Step 45815: {'lr': 8.6533568309555e-06, 'samples': 23457792, 'steps': 45815, 'batch_loss/train': 0.7970824539661407} +12/28/2021 23:55:25 - INFO - codeparrot_training - Step 45816: {'lr': 8.649245918881555e-06, 'samples': 23458304, 'steps': 45816, 'batch_loss/train': 0.6198735534271691} +12/28/2021 23:55:36 - INFO - codeparrot_training - Step 45817: {'lr': 8.6451359663271e-06, 'samples': 23458816, 'steps': 45817, 'batch_loss/train': 0.7763317599892616} +12/28/2021 23:55:47 - INFO - codeparrot_training - Step 45818: {'lr': 8.641026973308402e-06, 'samples': 23459328, 'steps': 45818, 'batch_loss/train': 0.7200931245461106} +12/28/2021 23:55:59 - INFO - codeparrot_training - Step 45819: {'lr': 8.636918939841893e-06, 'samples': 23459840, 'steps': 45819, 'batch_loss/train': 0.692627145908773} +12/28/2021 23:56:09 - INFO - codeparrot_training - Step 45820: {'lr': 8.632811865943835e-06, 'samples': 23460352, 'steps': 45820, 'batch_loss/train': 0.7332671396434307} +12/28/2021 23:56:20 - INFO - codeparrot_training - Step 45821: {'lr': 8.628705751630606e-06, 'samples': 23460864, 'steps': 45821, 'batch_loss/train': 0.6572561543434858} +12/28/2021 23:56:34 - INFO - codeparrot_training - Step 45822: {'lr': 8.624600596918469e-06, 'samples': 23461376, 'steps': 45822, 'batch_loss/train': 0.6823655287735164} +12/28/2021 23:56:44 - INFO - codeparrot_training - Step 45823: {'lr': 8.620496401823801e-06, 'samples': 23461888, 'steps': 45823, 'batch_loss/train': 0.6879399307072163} +12/28/2021 23:56:55 - INFO - codeparrot_training - Step 45824: {'lr': 8.616393166362895e-06, 'samples': 23462400, 'steps': 45824, 'batch_loss/train': 0.7237093388102949} +12/28/2021 23:57:07 - INFO - codeparrot_training - Step 45825: {'lr': 8.612290890552043e-06, 'samples': 23462912, 'steps': 45825, 'batch_loss/train': 0.6864173971116543} +12/28/2021 23:57:18 - INFO - codeparrot_training - Step 45826: {'lr': 8.60818957440762e-06, 'samples': 23463424, 'steps': 45826, 'batch_loss/train': 0.7682352447882295} +12/28/2021 23:57:28 - INFO - codeparrot_training - Step 45827: {'lr': 8.604089217945864e-06, 'samples': 23463936, 'steps': 45827, 'batch_loss/train': 0.6671780729666352} +12/28/2021 23:57:39 - INFO - codeparrot_training - Step 45828: {'lr': 8.599989821183124e-06, 'samples': 23464448, 'steps': 45828, 'batch_loss/train': 0.5811509842751548} +12/28/2021 23:57:51 - INFO - codeparrot_training - Step 45829: {'lr': 8.595891384135634e-06, 'samples': 23464960, 'steps': 45829, 'batch_loss/train': 0.7675413126125932} +12/28/2021 23:58:02 - INFO - codeparrot_training - Step 45830: {'lr': 8.591793906819745e-06, 'samples': 23465472, 'steps': 45830, 'batch_loss/train': 0.7231537448242307} +12/28/2021 23:58:12 - INFO - codeparrot_training - Step 45831: {'lr': 8.587697389251774e-06, 'samples': 23465984, 'steps': 45831, 'batch_loss/train': 0.7087960690259933} +12/28/2021 23:58:25 - INFO - codeparrot_training - Step 45832: {'lr': 8.583601831447935e-06, 'samples': 23466496, 'steps': 45832, 'batch_loss/train': 0.7714222054928541} +12/28/2021 23:58:35 - INFO - codeparrot_training - Step 45833: {'lr': 8.579507233424488e-06, 'samples': 23467008, 'steps': 45833, 'batch_loss/train': 0.7243831856176257} +12/28/2021 23:58:46 - INFO - codeparrot_training - Step 45834: {'lr': 8.57541359519784e-06, 'samples': 23467520, 'steps': 45834, 'batch_loss/train': 0.9487563520669937} +12/28/2021 23:58:59 - INFO - codeparrot_training - Step 45835: {'lr': 8.57132091678417e-06, 'samples': 23468032, 'steps': 45835, 'batch_loss/train': 0.7227851670468226} +12/28/2021 23:59:10 - INFO - codeparrot_training - Step 45836: {'lr': 8.567229198199717e-06, 'samples': 23468544, 'steps': 45836, 'batch_loss/train': 0.7859726883471012} +12/28/2021 23:59:21 - INFO - codeparrot_training - Step 45837: {'lr': 8.563138439460883e-06, 'samples': 23469056, 'steps': 45837, 'batch_loss/train': 0.5970732003916055} +12/28/2021 23:59:31 - INFO - codeparrot_training - Step 45838: {'lr': 8.559048640583795e-06, 'samples': 23469568, 'steps': 45838, 'batch_loss/train': 0.705728309461847} +12/28/2021 23:59:44 - INFO - codeparrot_training - Step 45839: {'lr': 8.554959801584773e-06, 'samples': 23470080, 'steps': 45839, 'batch_loss/train': 0.6794826179975644} +12/28/2021 23:59:54 - INFO - codeparrot_training - Step 45840: {'lr': 8.550871922480053e-06, 'samples': 23470592, 'steps': 45840, 'batch_loss/train': 0.6350155947729945} +12/29/2021 00:00:05 - INFO - codeparrot_training - Step 45841: {'lr': 8.546785003285929e-06, 'samples': 23471104, 'steps': 45841, 'batch_loss/train': 0.5752781943883747} +12/29/2021 00:00:17 - INFO - codeparrot_training - Step 45842: {'lr': 8.542699044018582e-06, 'samples': 23471616, 'steps': 45842, 'batch_loss/train': 0.583563888707431} +12/29/2021 00:00:28 - INFO - codeparrot_training - Step 45843: {'lr': 8.538614044694359e-06, 'samples': 23472128, 'steps': 45843, 'batch_loss/train': 0.7364309309050441} +12/29/2021 00:00:38 - INFO - codeparrot_training - Step 45844: {'lr': 8.53453000532936e-06, 'samples': 23472640, 'steps': 45844, 'batch_loss/train': 0.7384189977310598} +12/29/2021 00:00:52 - INFO - codeparrot_training - Step 45845: {'lr': 8.530446925939933e-06, 'samples': 23473152, 'steps': 45845, 'batch_loss/train': 0.6102749803103507} +12/29/2021 00:01:03 - INFO - codeparrot_training - Step 45846: {'lr': 8.526364806542258e-06, 'samples': 23473664, 'steps': 45846, 'batch_loss/train': 0.829132161103189} +12/29/2021 00:01:13 - INFO - codeparrot_training - Step 45847: {'lr': 8.522283647152574e-06, 'samples': 23474176, 'steps': 45847, 'batch_loss/train': 0.7649382203817368} +12/29/2021 00:01:24 - INFO - codeparrot_training - Step 45848: {'lr': 8.518203447787143e-06, 'samples': 23474688, 'steps': 45848, 'batch_loss/train': 0.6275448771193624} +12/29/2021 00:01:36 - INFO - codeparrot_training - Step 45849: {'lr': 8.514124208462121e-06, 'samples': 23475200, 'steps': 45849, 'batch_loss/train': 0.6990237273275852} +12/29/2021 00:01:47 - INFO - codeparrot_training - Step 45850: {'lr': 8.5100459291938e-06, 'samples': 23475712, 'steps': 45850, 'batch_loss/train': 0.708981170784682} +12/29/2021 00:01:58 - INFO - codeparrot_training - Step 45851: {'lr': 8.505968609998304e-06, 'samples': 23476224, 'steps': 45851, 'batch_loss/train': 0.8538805413991213} +12/29/2021 00:02:12 - INFO - codeparrot_training - Step 45852: {'lr': 8.501892250891929e-06, 'samples': 23476736, 'steps': 45852, 'batch_loss/train': 0.6949786636978388} +12/29/2021 00:02:22 - INFO - codeparrot_training - Step 45853: {'lr': 8.497816851890828e-06, 'samples': 23477248, 'steps': 45853, 'batch_loss/train': 0.807506229262799} +12/29/2021 00:02:33 - INFO - codeparrot_training - Step 45854: {'lr': 8.493742413011235e-06, 'samples': 23477760, 'steps': 45854, 'batch_loss/train': 0.7197435714770108} +12/29/2021 00:02:45 - INFO - codeparrot_training - Step 45855: {'lr': 8.489668934269306e-06, 'samples': 23478272, 'steps': 45855, 'batch_loss/train': 0.7100622588768601} +12/29/2021 00:02:56 - INFO - codeparrot_training - Step 45856: {'lr': 8.48559641568128e-06, 'samples': 23478784, 'steps': 45856, 'batch_loss/train': 0.7745723119005561} +12/29/2021 00:03:06 - INFO - codeparrot_training - Step 45857: {'lr': 8.481524857263334e-06, 'samples': 23479296, 'steps': 45857, 'batch_loss/train': 0.699072350282222} +12/29/2021 00:03:17 - INFO - codeparrot_training - Step 45858: {'lr': 8.477454259031653e-06, 'samples': 23479808, 'steps': 45858, 'batch_loss/train': 0.7167767849750817} +12/29/2021 00:03:29 - INFO - codeparrot_training - Step 45859: {'lr': 8.473384621002389e-06, 'samples': 23480320, 'steps': 45859, 'batch_loss/train': 0.7186492079636082} +12/29/2021 00:03:40 - INFO - codeparrot_training - Step 45860: {'lr': 8.46931594319178e-06, 'samples': 23480832, 'steps': 45860, 'batch_loss/train': 0.6941277054138482} +12/29/2021 00:03:50 - INFO - codeparrot_training - Step 45861: {'lr': 8.465248225615978e-06, 'samples': 23481344, 'steps': 45861, 'batch_loss/train': 0.7818750659935176} +12/29/2021 00:04:04 - INFO - codeparrot_training - Step 45862: {'lr': 8.461181468291112e-06, 'samples': 23481856, 'steps': 45862, 'batch_loss/train': 0.7428356460295618} +12/29/2021 00:04:15 - INFO - codeparrot_training - Step 45863: {'lr': 8.457115671233417e-06, 'samples': 23482368, 'steps': 45863, 'batch_loss/train': 0.7335401698946953} +12/29/2021 00:04:26 - INFO - codeparrot_training - Step 45864: {'lr': 8.45305083445902e-06, 'samples': 23482880, 'steps': 45864, 'batch_loss/train': 1.0612156114075333} +12/29/2021 00:04:38 - INFO - codeparrot_training - Step 45865: {'lr': 8.4489869579841e-06, 'samples': 23483392, 'steps': 45865, 'batch_loss/train': 0.7500274442136288} +12/29/2021 00:04:48 - INFO - codeparrot_training - Step 45866: {'lr': 8.444924041824786e-06, 'samples': 23483904, 'steps': 45866, 'batch_loss/train': 0.7473608274012804} +12/29/2021 00:04:59 - INFO - codeparrot_training - Step 45867: {'lr': 8.440862085997258e-06, 'samples': 23484416, 'steps': 45867, 'batch_loss/train': 0.7177558178082108} +12/29/2021 00:05:10 - INFO - codeparrot_training - Step 45868: {'lr': 8.436801090517644e-06, 'samples': 23484928, 'steps': 45868, 'batch_loss/train': 0.7756232377141714} +12/29/2021 00:05:22 - INFO - codeparrot_training - Step 45869: {'lr': 8.432741055402121e-06, 'samples': 23485440, 'steps': 45869, 'batch_loss/train': 0.7530767982825637} +12/29/2021 00:05:33 - INFO - codeparrot_training - Step 45870: {'lr': 8.428681980666763e-06, 'samples': 23485952, 'steps': 45870, 'batch_loss/train': 0.9254194712266326} +12/29/2021 00:05:43 - INFO - codeparrot_training - Step 45871: {'lr': 8.42462386632778e-06, 'samples': 23486464, 'steps': 45871, 'batch_loss/train': 0.658649027289357} +12/29/2021 00:05:55 - INFO - codeparrot_training - Step 45872: {'lr': 8.420566712401294e-06, 'samples': 23486976, 'steps': 45872, 'batch_loss/train': 0.5088713721488602} +12/29/2021 00:06:06 - INFO - codeparrot_training - Step 45873: {'lr': 8.416510518903353e-06, 'samples': 23487488, 'steps': 45873, 'batch_loss/train': 0.6799912229180336} +12/29/2021 00:06:17 - INFO - codeparrot_training - Step 45874: {'lr': 8.412455285850218e-06, 'samples': 23488000, 'steps': 45874, 'batch_loss/train': 0.740252458723262} +12/29/2021 00:06:30 - INFO - codeparrot_training - Step 45875: {'lr': 8.408401013257905e-06, 'samples': 23488512, 'steps': 45875, 'batch_loss/train': 0.6587042239261791} +12/29/2021 00:06:41 - INFO - codeparrot_training - Step 45876: {'lr': 8.404347701142595e-06, 'samples': 23489024, 'steps': 45876, 'batch_loss/train': 0.7023724457249045} +12/29/2021 00:06:52 - INFO - codeparrot_training - Step 45877: {'lr': 8.40029534952036e-06, 'samples': 23489536, 'steps': 45877, 'batch_loss/train': 0.7282546758651733} +12/29/2021 00:07:04 - INFO - codeparrot_training - Step 45878: {'lr': 8.396243958407324e-06, 'samples': 23490048, 'steps': 45878, 'batch_loss/train': 0.7865441273897886} +12/29/2021 00:07:15 - INFO - codeparrot_training - Step 45879: {'lr': 8.392193527819585e-06, 'samples': 23490560, 'steps': 45879, 'batch_loss/train': 0.6800500359386206} +12/29/2021 00:07:25 - INFO - codeparrot_training - Step 45880: {'lr': 8.388144057773272e-06, 'samples': 23491072, 'steps': 45880, 'batch_loss/train': 0.7132895449176431} +12/29/2021 00:07:36 - INFO - codeparrot_training - Step 45881: {'lr': 8.384095548284453e-06, 'samples': 23491584, 'steps': 45881, 'batch_loss/train': 0.769105423707515} +12/29/2021 00:07:50 - INFO - codeparrot_training - Step 45882: {'lr': 8.380047999369256e-06, 'samples': 23492096, 'steps': 45882, 'batch_loss/train': 0.8487921603955328} +12/29/2021 00:08:01 - INFO - codeparrot_training - Step 45883: {'lr': 8.376001411043776e-06, 'samples': 23492608, 'steps': 45883, 'batch_loss/train': 0.7835700260475278} +12/29/2021 00:08:11 - INFO - codeparrot_training - Step 45884: {'lr': 8.37195578332403e-06, 'samples': 23493120, 'steps': 45884, 'batch_loss/train': 0.7395263556391001} +12/29/2021 00:08:23 - INFO - codeparrot_training - Step 45885: {'lr': 8.367911116226173e-06, 'samples': 23493632, 'steps': 45885, 'batch_loss/train': 0.7505915723158978} +12/29/2021 00:08:34 - INFO - codeparrot_training - Step 45886: {'lr': 8.363867409766301e-06, 'samples': 23494144, 'steps': 45886, 'batch_loss/train': 0.6959035250474699} +12/29/2021 00:08:45 - INFO - codeparrot_training - Step 45887: {'lr': 8.359824663960403e-06, 'samples': 23494656, 'steps': 45887, 'batch_loss/train': 0.5453165043145418} +12/29/2021 00:08:57 - INFO - codeparrot_training - Step 45888: {'lr': 8.355782878824602e-06, 'samples': 23495168, 'steps': 45888, 'batch_loss/train': 0.6720680617727339} +12/29/2021 00:09:07 - INFO - codeparrot_training - Step 45889: {'lr': 8.351742054375028e-06, 'samples': 23495680, 'steps': 45889, 'batch_loss/train': 0.7677802318939939} +12/29/2021 00:09:18 - INFO - codeparrot_training - Step 45890: {'lr': 8.347702190627638e-06, 'samples': 23496192, 'steps': 45890, 'batch_loss/train': 0.7828388791531324} +12/29/2021 00:09:28 - INFO - codeparrot_training - Step 45891: {'lr': 8.34366328759853e-06, 'samples': 23496704, 'steps': 45891, 'batch_loss/train': 0.7348129358142614} +12/29/2021 00:09:42 - INFO - codeparrot_training - Step 45892: {'lr': 8.339625345303803e-06, 'samples': 23497216, 'steps': 45892, 'batch_loss/train': 0.7042009723372757} +12/29/2021 00:09:53 - INFO - codeparrot_training - Step 45893: {'lr': 8.335588363759443e-06, 'samples': 23497728, 'steps': 45893, 'batch_loss/train': 0.7585695607122034} +12/29/2021 00:10:04 - INFO - codeparrot_training - Step 45894: {'lr': 8.331552342981552e-06, 'samples': 23498240, 'steps': 45894, 'batch_loss/train': 0.7390518842730671} +12/29/2021 00:10:16 - INFO - codeparrot_training - Step 45895: {'lr': 8.327517282986197e-06, 'samples': 23498752, 'steps': 45895, 'batch_loss/train': 0.8697770384605974} +12/29/2021 00:10:27 - INFO - codeparrot_training - Step 45896: {'lr': 8.32348318378931e-06, 'samples': 23499264, 'steps': 45896, 'batch_loss/train': 0.7075217714300379} +12/29/2021 00:10:37 - INFO - codeparrot_training - Step 45897: {'lr': 8.319450045407045e-06, 'samples': 23499776, 'steps': 45897, 'batch_loss/train': 0.7940001413226128} +12/29/2021 00:10:51 - INFO - codeparrot_training - Step 45898: {'lr': 8.31541786785539e-06, 'samples': 23500288, 'steps': 45898, 'batch_loss/train': 0.7849208964034915} +12/29/2021 00:11:02 - INFO - codeparrot_training - Step 45899: {'lr': 8.31138665115036e-06, 'samples': 23500800, 'steps': 45899, 'batch_loss/train': 0.7370990357594565} +12/29/2021 00:11:12 - INFO - codeparrot_training - Step 45900: {'lr': 8.307356395308024e-06, 'samples': 23501312, 'steps': 45900, 'batch_loss/train': 0.6697063744068146} +12/29/2021 00:11:23 - INFO - codeparrot_training - Step 45901: {'lr': 8.30332710034437e-06, 'samples': 23501824, 'steps': 45901, 'batch_loss/train': 0.692655669467058} +12/29/2021 00:11:35 - INFO - codeparrot_training - Step 45902: {'lr': 8.299298766275415e-06, 'samples': 23502336, 'steps': 45902, 'batch_loss/train': 0.742781805456616} +12/29/2021 00:11:45 - INFO - codeparrot_training - Step 45903: {'lr': 8.295271393117172e-06, 'samples': 23502848, 'steps': 45903, 'batch_loss/train': 0.6456279149278998} +12/29/2021 00:11:56 - INFO - codeparrot_training - Step 45904: {'lr': 8.291244980885681e-06, 'samples': 23503360, 'steps': 45904, 'batch_loss/train': 0.7076173331588507} +12/29/2021 00:12:08 - INFO - codeparrot_training - Step 45905: {'lr': 8.287219529596935e-06, 'samples': 23503872, 'steps': 45905, 'batch_loss/train': 0.6200220019090921} +12/29/2021 00:12:19 - INFO - codeparrot_training - Step 45906: {'lr': 8.283195039266943e-06, 'samples': 23504384, 'steps': 45906, 'batch_loss/train': 0.7321348460391164} +12/29/2021 00:12:29 - INFO - codeparrot_training - Step 45907: {'lr': 8.279171509911699e-06, 'samples': 23504896, 'steps': 45907, 'batch_loss/train': 0.7090786879416555} +12/29/2021 00:12:42 - INFO - codeparrot_training - Step 45908: {'lr': 8.275148941547183e-06, 'samples': 23505408, 'steps': 45908, 'batch_loss/train': 0.6757059150841087} +12/29/2021 00:12:52 - INFO - codeparrot_training - Step 45909: {'lr': 8.271127334189443e-06, 'samples': 23505920, 'steps': 45909, 'batch_loss/train': 0.7220122233848087} +12/29/2021 00:13:03 - INFO - codeparrot_training - Step 45910: {'lr': 8.267106687854381e-06, 'samples': 23506432, 'steps': 45910, 'batch_loss/train': 0.8641180284321308} +12/29/2021 00:13:17 - INFO - codeparrot_training - Step 45911: {'lr': 8.26308700255804e-06, 'samples': 23506944, 'steps': 45911, 'batch_loss/train': 0.72084323852323} +12/29/2021 00:13:27 - INFO - codeparrot_training - Step 45912: {'lr': 8.259068278316406e-06, 'samples': 23507456, 'steps': 45912, 'batch_loss/train': 0.8033717847429216} +12/29/2021 00:13:38 - INFO - codeparrot_training - Step 45913: {'lr': 8.25505051514544e-06, 'samples': 23507968, 'steps': 45913, 'batch_loss/train': 0.7471996219828725} +12/29/2021 00:13:49 - INFO - codeparrot_training - Step 45914: {'lr': 8.251033713061073e-06, 'samples': 23508480, 'steps': 45914, 'batch_loss/train': 0.7887028479017317} +12/29/2021 00:14:01 - INFO - codeparrot_training - Step 45915: {'lr': 8.247017872079377e-06, 'samples': 23508992, 'steps': 45915, 'batch_loss/train': 0.7744613456306979} +12/29/2021 00:14:12 - INFO - codeparrot_training - Step 45916: {'lr': 8.243002992216198e-06, 'samples': 23509504, 'steps': 45916, 'batch_loss/train': 0.7088074064813554} +12/29/2021 00:14:22 - INFO - codeparrot_training - Step 45917: {'lr': 8.238989073487579e-06, 'samples': 23510016, 'steps': 45917, 'batch_loss/train': 0.757908150088042} +12/29/2021 00:14:35 - INFO - codeparrot_training - Step 45918: {'lr': 8.234976115909453e-06, 'samples': 23510528, 'steps': 45918, 'batch_loss/train': 0.6543422963004559} +12/29/2021 00:14:45 - INFO - codeparrot_training - Step 45919: {'lr': 8.23096411949778e-06, 'samples': 23511040, 'steps': 45919, 'batch_loss/train': 0.4605534464935772} +12/29/2021 00:14:56 - INFO - codeparrot_training - Step 45920: {'lr': 8.22695308426849e-06, 'samples': 23511552, 'steps': 45920, 'batch_loss/train': 0.685928335878998} +12/29/2021 00:15:07 - INFO - codeparrot_training - Step 45921: {'lr': 8.222943010237572e-06, 'samples': 23512064, 'steps': 45921, 'batch_loss/train': 0.6867907484993339} +12/29/2021 00:15:20 - INFO - codeparrot_training - Step 45922: {'lr': 8.218933897420927e-06, 'samples': 23512576, 'steps': 45922, 'batch_loss/train': 0.8677508528344333} +12/29/2021 00:15:31 - INFO - codeparrot_training - Step 45923: {'lr': 8.214925745834489e-06, 'samples': 23513088, 'steps': 45923, 'batch_loss/train': 0.6681092967046425} +12/29/2021 00:15:42 - INFO - codeparrot_training - Step 45924: {'lr': 8.210918555494246e-06, 'samples': 23513600, 'steps': 45924, 'batch_loss/train': 0.8505732230842113} +12/29/2021 00:15:54 - INFO - codeparrot_training - Step 45925: {'lr': 8.206912326416071e-06, 'samples': 23514112, 'steps': 45925, 'batch_loss/train': 0.5908297869609669} +12/29/2021 00:16:05 - INFO - codeparrot_training - Step 45926: {'lr': 8.2029070586159e-06, 'samples': 23514624, 'steps': 45926, 'batch_loss/train': 0.7207009163685143} +12/29/2021 00:16:15 - INFO - codeparrot_training - Step 45927: {'lr': 8.198902752109716e-06, 'samples': 23515136, 'steps': 45927, 'batch_loss/train': 0.6977292550727725} +12/29/2021 00:16:29 - INFO - codeparrot_training - Step 45928: {'lr': 8.19489940691337e-06, 'samples': 23515648, 'steps': 45928, 'batch_loss/train': 0.7869254238903522} +12/29/2021 00:16:40 - INFO - codeparrot_training - Step 45929: {'lr': 8.19089702304282e-06, 'samples': 23516160, 'steps': 45929, 'batch_loss/train': 0.6917821569368243} +12/29/2021 00:16:50 - INFO - codeparrot_training - Step 45930: {'lr': 8.186895600513944e-06, 'samples': 23516672, 'steps': 45930, 'batch_loss/train': 0.7608742658048868} +12/29/2021 00:17:01 - INFO - codeparrot_training - Step 45931: {'lr': 8.182895139342645e-06, 'samples': 23517184, 'steps': 45931, 'batch_loss/train': 0.6987920677638613} +12/29/2021 00:17:13 - INFO - codeparrot_training - Step 45932: {'lr': 8.178895639544882e-06, 'samples': 23517696, 'steps': 45932, 'batch_loss/train': 0.6686399793252349} +12/29/2021 00:17:24 - INFO - codeparrot_training - Step 45933: {'lr': 8.174897101136502e-06, 'samples': 23518208, 'steps': 45933, 'batch_loss/train': 0.7463299809023738} +12/29/2021 00:17:34 - INFO - codeparrot_training - Step 45934: {'lr': 8.170899524133412e-06, 'samples': 23518720, 'steps': 45934, 'batch_loss/train': 0.7023141696117818} +12/29/2021 00:17:47 - INFO - codeparrot_training - Step 45935: {'lr': 8.166902908551571e-06, 'samples': 23519232, 'steps': 45935, 'batch_loss/train': 0.7883778037503362} +12/29/2021 00:17:57 - INFO - codeparrot_training - Step 45936: {'lr': 8.162907254406742e-06, 'samples': 23519744, 'steps': 45936, 'batch_loss/train': 0.5468595686834306} +12/29/2021 00:18:08 - INFO - codeparrot_training - Step 45937: {'lr': 8.158912561714887e-06, 'samples': 23520256, 'steps': 45937, 'batch_loss/train': 2.2797786127775908} +12/29/2021 00:18:22 - INFO - codeparrot_training - Step 45938: {'lr': 8.154918830491937e-06, 'samples': 23520768, 'steps': 45938, 'batch_loss/train': 0.7773715853691101} +12/29/2021 00:18:33 - INFO - codeparrot_training - Step 45939: {'lr': 8.150926060753683e-06, 'samples': 23521280, 'steps': 45939, 'batch_loss/train': 0.7608843627385795} +12/29/2021 00:18:43 - INFO - codeparrot_training - Step 45940: {'lr': 8.146934252515974e-06, 'samples': 23521792, 'steps': 45940, 'batch_loss/train': 0.6967465593479574} +12/29/2021 00:18:54 - INFO - codeparrot_training - Step 45941: {'lr': 8.142943405794829e-06, 'samples': 23522304, 'steps': 45941, 'batch_loss/train': 0.5909591112285852} +12/29/2021 00:19:06 - INFO - codeparrot_training - Step 45942: {'lr': 8.138953520605952e-06, 'samples': 23522816, 'steps': 45942, 'batch_loss/train': 0.7973785204812884} +12/29/2021 00:19:17 - INFO - codeparrot_training - Step 45943: {'lr': 8.134964596965305e-06, 'samples': 23523328, 'steps': 45943, 'batch_loss/train': 0.8302529789507389} +12/29/2021 00:19:27 - INFO - codeparrot_training - Step 45944: {'lr': 8.130976634888682e-06, 'samples': 23523840, 'steps': 45944, 'batch_loss/train': 0.708065964281559} +12/29/2021 00:19:39 - INFO - codeparrot_training - Step 45945: {'lr': 8.126989634391985e-06, 'samples': 23524352, 'steps': 45945, 'batch_loss/train': 0.7203690647147596} +12/29/2021 00:19:50 - INFO - codeparrot_training - Step 45946: {'lr': 8.123003595491064e-06, 'samples': 23524864, 'steps': 45946, 'batch_loss/train': 0.7070555910468102} +12/29/2021 00:20:00 - INFO - codeparrot_training - Step 45947: {'lr': 8.119018518201737e-06, 'samples': 23525376, 'steps': 45947, 'batch_loss/train': 0.7463821256533265} +12/29/2021 00:20:13 - INFO - codeparrot_training - Step 45948: {'lr': 8.115034402539856e-06, 'samples': 23525888, 'steps': 45948, 'batch_loss/train': 0.727501580491662} +12/29/2021 00:20:23 - INFO - codeparrot_training - Step 45949: {'lr': 8.111051248521295e-06, 'samples': 23526400, 'steps': 45949, 'batch_loss/train': 0.7676265952177346} +12/29/2021 00:20:34 - INFO - codeparrot_training - Step 45950: {'lr': 8.107069056161848e-06, 'samples': 23526912, 'steps': 45950, 'batch_loss/train': 0.7829691204242408} +12/29/2021 00:20:44 - INFO - codeparrot_training - Step 45951: {'lr': 8.103087825477334e-06, 'samples': 23527424, 'steps': 45951, 'batch_loss/train': 0.775394706055522} +12/29/2021 00:20:58 - INFO - codeparrot_training - Step 45952: {'lr': 8.099107556483632e-06, 'samples': 23527936, 'steps': 45952, 'batch_loss/train': 0.7744486443698406} +12/29/2021 00:21:09 - INFO - codeparrot_training - Step 45953: {'lr': 8.09512824919656e-06, 'samples': 23528448, 'steps': 45953, 'batch_loss/train': 0.6965901623480022} +12/29/2021 00:21:20 - INFO - codeparrot_training - Step 45954: {'lr': 8.091149903631884e-06, 'samples': 23528960, 'steps': 45954, 'batch_loss/train': 0.7916807951405644} +12/29/2021 00:21:32 - INFO - codeparrot_training - Step 45955: {'lr': 8.087172519805452e-06, 'samples': 23529472, 'steps': 45955, 'batch_loss/train': 0.7050600721267983} +12/29/2021 00:21:42 - INFO - codeparrot_training - Step 45956: {'lr': 8.083196097733087e-06, 'samples': 23529984, 'steps': 45956, 'batch_loss/train': 0.8822928350418806} +12/29/2021 00:21:53 - INFO - codeparrot_training - Step 45957: {'lr': 8.079220637430606e-06, 'samples': 23530496, 'steps': 45957, 'batch_loss/train': 0.7469110791571438} +12/29/2021 00:22:06 - INFO - codeparrot_training - Step 45958: {'lr': 8.075246138913777e-06, 'samples': 23531008, 'steps': 45958, 'batch_loss/train': 0.761259094811976} +12/29/2021 00:22:16 - INFO - codeparrot_training - Step 45959: {'lr': 8.071272602198421e-06, 'samples': 23531520, 'steps': 45959, 'batch_loss/train': 1.1236356510780752} +12/29/2021 00:22:27 - INFO - codeparrot_training - Step 45960: {'lr': 8.067300027300356e-06, 'samples': 23532032, 'steps': 45960, 'batch_loss/train': 0.8083319319412112} +12/29/2021 00:22:37 - INFO - codeparrot_training - Step 45961: {'lr': 8.063328414235348e-06, 'samples': 23532544, 'steps': 45961, 'batch_loss/train': 0.7077586890663952} +12/29/2021 00:22:51 - INFO - codeparrot_training - Step 45962: {'lr': 8.059357763019165e-06, 'samples': 23533056, 'steps': 45962, 'batch_loss/train': 0.7960174083709717} +12/29/2021 00:23:02 - INFO - codeparrot_training - Step 45963: {'lr': 8.055388073667653e-06, 'samples': 23533568, 'steps': 45963, 'batch_loss/train': 0.8330558594316244} +12/29/2021 00:23:13 - INFO - codeparrot_training - Step 45964: {'lr': 8.051419346196603e-06, 'samples': 23534080, 'steps': 45964, 'batch_loss/train': 0.6916559273377061} +12/29/2021 00:23:25 - INFO - codeparrot_training - Step 45965: {'lr': 8.047451580621702e-06, 'samples': 23534592, 'steps': 45965, 'batch_loss/train': 0.7546906294301152} +12/29/2021 00:23:36 - INFO - codeparrot_training - Step 45966: {'lr': 8.043484776958738e-06, 'samples': 23535104, 'steps': 45966, 'batch_loss/train': 0.7510942325461656} +12/29/2021 00:23:46 - INFO - codeparrot_training - Step 45967: {'lr': 8.03951893522359e-06, 'samples': 23535616, 'steps': 45967, 'batch_loss/train': 1.0909689301624894} +12/29/2021 00:24:00 - INFO - codeparrot_training - Step 45968: {'lr': 8.035554055431937e-06, 'samples': 23536128, 'steps': 45968, 'batch_loss/train': 0.775720402598381} +12/29/2021 00:24:11 - INFO - codeparrot_training - Step 45969: {'lr': 8.031590137599549e-06, 'samples': 23536640, 'steps': 45969, 'batch_loss/train': 0.771659036166966} +12/29/2021 00:24:22 - INFO - codeparrot_training - Step 45970: {'lr': 8.027627181742186e-06, 'samples': 23537152, 'steps': 45970, 'batch_loss/train': 0.6951901218853891} +12/29/2021 00:24:32 - INFO - codeparrot_training - Step 45971: {'lr': 8.023665187875618e-06, 'samples': 23537664, 'steps': 45971, 'batch_loss/train': 0.6865398911759257} +12/29/2021 00:24:44 - INFO - codeparrot_training - Step 45972: {'lr': 8.019704156015606e-06, 'samples': 23538176, 'steps': 45972, 'batch_loss/train': 0.7026589440647513} +12/29/2021 00:24:55 - INFO - codeparrot_training - Step 45973: {'lr': 8.015744086177861e-06, 'samples': 23538688, 'steps': 45973, 'batch_loss/train': 0.631228398764506} +12/29/2021 00:25:06 - INFO - codeparrot_training - Step 45974: {'lr': 8.01178497837815e-06, 'samples': 23539200, 'steps': 45974, 'batch_loss/train': 0.7088735485449433} +12/29/2021 00:25:18 - INFO - codeparrot_training - Step 45975: {'lr': 8.007826832632236e-06, 'samples': 23539712, 'steps': 45975, 'batch_loss/train': 0.48862741189077497} +12/29/2021 00:25:28 - INFO - codeparrot_training - Step 45976: {'lr': 8.00386964895583e-06, 'samples': 23540224, 'steps': 45976, 'batch_loss/train': 0.7815499906428158} +12/29/2021 00:25:39 - INFO - codeparrot_training - Step 45977: {'lr': 7.999913427364614e-06, 'samples': 23540736, 'steps': 45977, 'batch_loss/train': 0.7022904464974999} +12/29/2021 00:25:53 - INFO - codeparrot_training - Step 45978: {'lr': 7.995958167874406e-06, 'samples': 23541248, 'steps': 45978, 'batch_loss/train': 0.7625417213421315} +12/29/2021 00:26:04 - INFO - codeparrot_training - Step 45979: {'lr': 7.992003870500919e-06, 'samples': 23541760, 'steps': 45979, 'batch_loss/train': 0.8293354790657759} +12/29/2021 00:26:14 - INFO - codeparrot_training - Step 45980: {'lr': 7.988050535259805e-06, 'samples': 23542272, 'steps': 45980, 'batch_loss/train': 0.7723077200353146} +12/29/2021 00:26:25 - INFO - codeparrot_training - Step 45981: {'lr': 7.984098162166802e-06, 'samples': 23542784, 'steps': 45981, 'batch_loss/train': 0.7690677363425493} +12/29/2021 00:26:37 - INFO - codeparrot_training - Step 45982: {'lr': 7.980146751237677e-06, 'samples': 23543296, 'steps': 45982, 'batch_loss/train': 0.7653434197418392} +12/29/2021 00:26:48 - INFO - codeparrot_training - Step 45983: {'lr': 7.976196302488109e-06, 'samples': 23543808, 'steps': 45983, 'batch_loss/train': 0.7371156215667725} +12/29/2021 00:26:58 - INFO - codeparrot_training - Step 45984: {'lr': 7.97224681593378e-06, 'samples': 23544320, 'steps': 45984, 'batch_loss/train': 0.6547699109651148} +12/29/2021 00:27:10 - INFO - codeparrot_training - Step 45985: {'lr': 7.968298291590403e-06, 'samples': 23544832, 'steps': 45985, 'batch_loss/train': 1.229565274901688} +12/29/2021 00:27:21 - INFO - codeparrot_training - Step 45986: {'lr': 7.964350729473713e-06, 'samples': 23545344, 'steps': 45986, 'batch_loss/train': 0.6364777218550444} +12/29/2021 00:27:32 - INFO - codeparrot_training - Step 45987: {'lr': 7.960404129599363e-06, 'samples': 23545856, 'steps': 45987, 'batch_loss/train': 0.6772932172752917} +12/29/2021 00:27:44 - INFO - codeparrot_training - Step 45988: {'lr': 7.956458491983066e-06, 'samples': 23546368, 'steps': 45988, 'batch_loss/train': 0.8234202042222023} +12/29/2021 00:27:55 - INFO - codeparrot_training - Step 45989: {'lr': 7.952513816640473e-06, 'samples': 23546880, 'steps': 45989, 'batch_loss/train': 0.6999631971120834} +12/29/2021 00:28:05 - INFO - codeparrot_training - Step 45990: {'lr': 7.948570103587322e-06, 'samples': 23547392, 'steps': 45990, 'batch_loss/train': 0.5882127593213227} +12/29/2021 00:28:16 - INFO - codeparrot_training - Step 45991: {'lr': 7.94462735283924e-06, 'samples': 23547904, 'steps': 45991, 'batch_loss/train': 0.6951046936446801} +12/29/2021 00:28:30 - INFO - codeparrot_training - Step 45992: {'lr': 7.94068556441191e-06, 'samples': 23548416, 'steps': 45992, 'batch_loss/train': 0.7766945324838161} +12/29/2021 00:28:40 - INFO - codeparrot_training - Step 45993: {'lr': 7.936744738321066e-06, 'samples': 23548928, 'steps': 45993, 'batch_loss/train': 0.7070937417447567} +12/29/2021 00:28:51 - INFO - codeparrot_training - Step 45994: {'lr': 7.932804874582283e-06, 'samples': 23549440, 'steps': 45994, 'batch_loss/train': 0.658990434370935} +12/29/2021 00:29:03 - INFO - codeparrot_training - Step 45995: {'lr': 7.928865973211268e-06, 'samples': 23549952, 'steps': 45995, 'batch_loss/train': 0.7550226468592882} +12/29/2021 00:29:14 - INFO - codeparrot_training - Step 45996: {'lr': 7.924928034223705e-06, 'samples': 23550464, 'steps': 45996, 'batch_loss/train': 0.6961405258625746} +12/29/2021 00:29:24 - INFO - codeparrot_training - Step 45997: {'lr': 7.920991057635218e-06, 'samples': 23550976, 'steps': 45997, 'batch_loss/train': 1.3225031602196395} +12/29/2021 00:29:38 - INFO - codeparrot_training - Step 45998: {'lr': 7.917055043461436e-06, 'samples': 23551488, 'steps': 45998, 'batch_loss/train': 0.7731918497011065} +12/29/2021 00:29:49 - INFO - codeparrot_training - Step 45999: {'lr': 7.913119991718064e-06, 'samples': 23552000, 'steps': 45999, 'batch_loss/train': 0.7835453227162361} +12/29/2021 00:30:00 - INFO - codeparrot_training - Step 46000: {'lr': 7.909185902420707e-06, 'samples': 23552512, 'steps': 46000, 'batch_loss/train': 0.619580915896222} +12/29/2021 00:30:12 - INFO - codeparrot_training - Step 46001: {'lr': 7.905252775585014e-06, 'samples': 23553024, 'steps': 46001, 'batch_loss/train': 0.7760551385581493} +12/29/2021 00:30:22 - INFO - codeparrot_training - Step 46002: {'lr': 7.90132061122667e-06, 'samples': 23553536, 'steps': 46002, 'batch_loss/train': 0.6561717865988612} +12/29/2021 00:30:33 - INFO - codeparrot_training - Step 46003: {'lr': 7.89738940936119e-06, 'samples': 23554048, 'steps': 46003, 'batch_loss/train': 0.7552500236779451} +12/29/2021 00:30:44 - INFO - codeparrot_training - Step 46004: {'lr': 7.893459170004308e-06, 'samples': 23554560, 'steps': 46004, 'batch_loss/train': 0.8095080283237621} +12/29/2021 00:30:56 - INFO - codeparrot_training - Step 46005: {'lr': 7.889529893171627e-06, 'samples': 23555072, 'steps': 46005, 'batch_loss/train': 0.6682095299474895} +12/29/2021 00:31:06 - INFO - codeparrot_training - Step 46006: {'lr': 7.885601578878742e-06, 'samples': 23555584, 'steps': 46006, 'batch_loss/train': 0.8173375884070992} +12/29/2021 00:31:17 - INFO - codeparrot_training - Step 46007: {'lr': 7.881674227141283e-06, 'samples': 23556096, 'steps': 46007, 'batch_loss/train': 0.8249724647030234} +12/29/2021 00:31:31 - INFO - codeparrot_training - Step 46008: {'lr': 7.877747837974902e-06, 'samples': 23556608, 'steps': 46008, 'batch_loss/train': 0.7390121463686228} +12/29/2021 00:31:42 - INFO - codeparrot_training - Step 46009: {'lr': 7.873822411395143e-06, 'samples': 23557120, 'steps': 46009, 'batch_loss/train': 0.9979760418646038} +12/29/2021 00:31:52 - INFO - codeparrot_training - Step 46010: {'lr': 7.869897947417631e-06, 'samples': 23557632, 'steps': 46010, 'batch_loss/train': 0.7872964357957244} +12/29/2021 00:32:05 - INFO - codeparrot_training - Step 46011: {'lr': 7.865974446057993e-06, 'samples': 23558144, 'steps': 46011, 'batch_loss/train': 0.658901026006788} +12/29/2021 00:32:15 - INFO - codeparrot_training - Step 46012: {'lr': 7.86205190733183e-06, 'samples': 23558656, 'steps': 46012, 'batch_loss/train': 0.7142264756839722} +12/29/2021 00:32:26 - INFO - codeparrot_training - Step 46013: {'lr': 7.85813033125471e-06, 'samples': 23559168, 'steps': 46013, 'batch_loss/train': 0.7885884018614888} +12/29/2021 00:32:40 - INFO - codeparrot_training - Step 46014: {'lr': 7.854209717842232e-06, 'samples': 23559680, 'steps': 46014, 'batch_loss/train': 0.6219241379294544} +12/29/2021 00:32:51 - INFO - codeparrot_training - Step 46015: {'lr': 7.850290067109966e-06, 'samples': 23560192, 'steps': 46015, 'batch_loss/train': 0.7559030069969594} +12/29/2021 00:33:01 - INFO - codeparrot_training - Step 46016: {'lr': 7.846371379073542e-06, 'samples': 23560704, 'steps': 46016, 'batch_loss/train': 0.6916403230279684} +12/29/2021 00:33:12 - INFO - codeparrot_training - Step 46017: {'lr': 7.842453653748527e-06, 'samples': 23561216, 'steps': 46017, 'batch_loss/train': 0.806477876380086} +12/29/2021 00:33:24 - INFO - codeparrot_training - Step 46018: {'lr': 7.838536891150438e-06, 'samples': 23561728, 'steps': 46018, 'batch_loss/train': 0.727889786940068} +12/29/2021 00:33:35 - INFO - codeparrot_training - Step 46019: {'lr': 7.83462109129493e-06, 'samples': 23562240, 'steps': 46019, 'batch_loss/train': 0.5840343958698213} +12/29/2021 00:33:46 - INFO - codeparrot_training - Step 46020: {'lr': 7.830706254197517e-06, 'samples': 23562752, 'steps': 46020, 'batch_loss/train': 0.7080973666161299} +12/29/2021 00:33:58 - INFO - codeparrot_training - Step 46021: {'lr': 7.826792379873742e-06, 'samples': 23563264, 'steps': 46021, 'batch_loss/train': 0.6264812012086622} +12/29/2021 00:34:08 - INFO - codeparrot_training - Step 46022: {'lr': 7.822879468339262e-06, 'samples': 23563776, 'steps': 46022, 'batch_loss/train': 0.7227882472798228} +12/29/2021 00:34:19 - INFO - codeparrot_training - Step 46023: {'lr': 7.818967519609533e-06, 'samples': 23564288, 'steps': 46023, 'batch_loss/train': 0.7424850901588798} +12/29/2021 00:34:31 - INFO - codeparrot_training - Step 46024: {'lr': 7.815056533700127e-06, 'samples': 23564800, 'steps': 46024, 'batch_loss/train': 0.728226619772613} +12/29/2021 00:34:42 - INFO - codeparrot_training - Step 46025: {'lr': 7.811146510626643e-06, 'samples': 23565312, 'steps': 46025, 'batch_loss/train': 0.6641716263256967} +12/29/2021 00:34:52 - INFO - codeparrot_training - Step 46026: {'lr': 7.807237450404569e-06, 'samples': 23565824, 'steps': 46026, 'batch_loss/train': 0.7860753117129207} +12/29/2021 00:35:03 - INFO - codeparrot_training - Step 46027: {'lr': 7.803329353049476e-06, 'samples': 23566336, 'steps': 46027, 'batch_loss/train': 0.7194254044443369} +12/29/2021 00:35:17 - INFO - codeparrot_training - Step 46028: {'lr': 7.799422218576935e-06, 'samples': 23566848, 'steps': 46028, 'batch_loss/train': 0.6905865308362991} +12/29/2021 00:35:28 - INFO - codeparrot_training - Step 46029: {'lr': 7.795516047002377e-06, 'samples': 23567360, 'steps': 46029, 'batch_loss/train': 0.8332540011033416} +12/29/2021 00:35:38 - INFO - codeparrot_training - Step 46030: {'lr': 7.791610838341428e-06, 'samples': 23567872, 'steps': 46030, 'batch_loss/train': 0.6893685760442168} +12/29/2021 00:35:50 - INFO - codeparrot_training - Step 46031: {'lr': 7.787706592609606e-06, 'samples': 23568384, 'steps': 46031, 'batch_loss/train': 0.8097557974979281} +12/29/2021 00:36:01 - INFO - codeparrot_training - Step 46032: {'lr': 7.783803309822368e-06, 'samples': 23568896, 'steps': 46032, 'batch_loss/train': 0.7750787164550275} +12/29/2021 00:36:12 - INFO - codeparrot_training - Step 46033: {'lr': 7.77990098999526e-06, 'samples': 23569408, 'steps': 46033, 'batch_loss/train': 0.7891280758194625} +12/29/2021 00:36:24 - INFO - codeparrot_training - Step 46034: {'lr': 7.775999633143876e-06, 'samples': 23569920, 'steps': 46034, 'batch_loss/train': 0.669242728035897} +12/29/2021 00:36:34 - INFO - codeparrot_training - Step 46035: {'lr': 7.772099239283598e-06, 'samples': 23570432, 'steps': 46035, 'batch_loss/train': 0.7903364300727844} +12/29/2021 00:36:45 - INFO - codeparrot_training - Step 46036: {'lr': 7.768199808429993e-06, 'samples': 23570944, 'steps': 46036, 'batch_loss/train': 0.6997565279598348} +12/29/2021 00:36:56 - INFO - codeparrot_training - Step 46037: {'lr': 7.76430134059858e-06, 'samples': 23571456, 'steps': 46037, 'batch_loss/train': 0.6152092239353806} +12/29/2021 00:37:10 - INFO - codeparrot_training - Step 46038: {'lr': 7.76040383580484e-06, 'samples': 23571968, 'steps': 46038, 'batch_loss/train': 0.7341688182204962} +12/29/2021 00:37:20 - INFO - codeparrot_training - Step 46039: {'lr': 7.75650729406424e-06, 'samples': 23572480, 'steps': 46039, 'batch_loss/train': 0.7829398917965591} +12/29/2021 00:37:31 - INFO - codeparrot_training - Step 46040: {'lr': 7.752611715392322e-06, 'samples': 23572992, 'steps': 46040, 'batch_loss/train': 0.7738297744654119} +12/29/2021 00:37:43 - INFO - codeparrot_training - Step 46041: {'lr': 7.748717099804543e-06, 'samples': 23573504, 'steps': 46041, 'batch_loss/train': 0.6800192436203361} +12/29/2021 00:37:54 - INFO - codeparrot_training - Step 46042: {'lr': 7.744823447316418e-06, 'samples': 23574016, 'steps': 46042, 'batch_loss/train': 0.7075393190607429} +12/29/2021 00:38:04 - INFO - codeparrot_training - Step 46043: {'lr': 7.74093075794341e-06, 'samples': 23574528, 'steps': 46043, 'batch_loss/train': 0.7804581262171268} +12/29/2021 00:38:18 - INFO - codeparrot_training - Step 46044: {'lr': 7.73703903170092e-06, 'samples': 23575040, 'steps': 46044, 'batch_loss/train': 0.8012520764023066} +12/29/2021 00:38:29 - INFO - codeparrot_training - Step 46045: {'lr': 7.733148268604578e-06, 'samples': 23575552, 'steps': 46045, 'batch_loss/train': 0.7792073683813214} +12/29/2021 00:38:39 - INFO - codeparrot_training - Step 46046: {'lr': 7.729258468669704e-06, 'samples': 23576064, 'steps': 46046, 'batch_loss/train': 0.6830652924254537} +12/29/2021 00:38:50 - INFO - codeparrot_training - Step 46047: {'lr': 7.725369631911811e-06, 'samples': 23576576, 'steps': 46047, 'batch_loss/train': 0.8283684030175209} +12/29/2021 00:39:02 - INFO - codeparrot_training - Step 46048: {'lr': 7.721481758346416e-06, 'samples': 23577088, 'steps': 46048, 'batch_loss/train': 0.7434368960093707} +12/29/2021 00:39:13 - INFO - codeparrot_training - Step 46049: {'lr': 7.717594847988896e-06, 'samples': 23577600, 'steps': 46049, 'batch_loss/train': 0.6952446182258427} +12/29/2021 00:39:23 - INFO - codeparrot_training - Step 46050: {'lr': 7.713708900854738e-06, 'samples': 23578112, 'steps': 46050, 'batch_loss/train': 0.6898423626553267} +12/29/2021 00:39:35 - INFO - codeparrot_training - Step 46051: {'lr': 7.709823916959401e-06, 'samples': 23578624, 'steps': 46051, 'batch_loss/train': 0.6357289962470531} +12/29/2021 00:39:46 - INFO - codeparrot_training - Step 46052: {'lr': 7.705939896318293e-06, 'samples': 23579136, 'steps': 46052, 'batch_loss/train': 0.7413702975027263} +12/29/2021 00:39:57 - INFO - codeparrot_training - Step 46053: {'lr': 7.702056838946896e-06, 'samples': 23579648, 'steps': 46053, 'batch_loss/train': 0.7348511829040945} +12/29/2021 00:40:11 - INFO - codeparrot_training - Step 46054: {'lr': 7.698174744860675e-06, 'samples': 23580160, 'steps': 46054, 'batch_loss/train': 0.603451712639071} +12/29/2021 00:40:21 - INFO - codeparrot_training - Step 46055: {'lr': 7.694293614074949e-06, 'samples': 23580672, 'steps': 46055, 'batch_loss/train': 0.7958884132094681} +12/29/2021 00:40:32 - INFO - codeparrot_training - Step 46056: {'lr': 7.69041344660526e-06, 'samples': 23581184, 'steps': 46056, 'batch_loss/train': 0.6693559210980311} +12/29/2021 00:40:44 - INFO - codeparrot_training - Step 46057: {'lr': 7.686534242467015e-06, 'samples': 23581696, 'steps': 46057, 'batch_loss/train': 0.7658480606041849} +12/29/2021 00:40:55 - INFO - codeparrot_training - Step 46058: {'lr': 7.682656001675558e-06, 'samples': 23582208, 'steps': 46058, 'batch_loss/train': 0.770905239507556} +12/29/2021 00:41:05 - INFO - codeparrot_training - Step 46059: {'lr': 7.67877872424641e-06, 'samples': 23582720, 'steps': 46059, 'batch_loss/train': 0.7600222756154835} +12/29/2021 00:41:16 - INFO - codeparrot_training - Step 46060: {'lr': 7.674902410194945e-06, 'samples': 23583232, 'steps': 46060, 'batch_loss/train': 0.877672332979273} +12/29/2021 00:41:28 - INFO - codeparrot_training - Step 46061: {'lr': 7.671027059536567e-06, 'samples': 23583744, 'steps': 46061, 'batch_loss/train': 0.7040692130103707} +12/29/2021 00:41:39 - INFO - codeparrot_training - Step 46062: {'lr': 7.667152672286625e-06, 'samples': 23584256, 'steps': 46062, 'batch_loss/train': 0.7084497702307999} +12/29/2021 00:41:49 - INFO - codeparrot_training - Step 46063: {'lr': 7.663279248460663e-06, 'samples': 23584768, 'steps': 46063, 'batch_loss/train': 0.8476134929805994} +12/29/2021 00:42:04 - INFO - codeparrot_training - Step 46064: {'lr': 7.659406788073975e-06, 'samples': 23585280, 'steps': 46064, 'batch_loss/train': 0.8810689486563206} +12/29/2021 00:42:14 - INFO - codeparrot_training - Step 46065: {'lr': 7.655535291141964e-06, 'samples': 23585792, 'steps': 46065, 'batch_loss/train': 0.6957886125892401} +12/29/2021 00:42:25 - INFO - codeparrot_training - Step 46066: {'lr': 7.65166475768006e-06, 'samples': 23586304, 'steps': 46066, 'batch_loss/train': 0.7815111763775349} +12/29/2021 00:42:36 - INFO - codeparrot_training - Step 46067: {'lr': 7.647795187703615e-06, 'samples': 23586816, 'steps': 46067, 'batch_loss/train': 0.8178627341985703} +12/29/2021 00:42:48 - INFO - codeparrot_training - Step 46068: {'lr': 7.643926581228006e-06, 'samples': 23587328, 'steps': 46068, 'batch_loss/train': 0.6731561560882255} +12/29/2021 00:42:58 - INFO - codeparrot_training - Step 46069: {'lr': 7.640058938268662e-06, 'samples': 23587840, 'steps': 46069, 'batch_loss/train': 0.7260149461217225} +12/29/2021 00:43:09 - INFO - codeparrot_training - Step 46070: {'lr': 7.636192258840935e-06, 'samples': 23588352, 'steps': 46070, 'batch_loss/train': 0.6408910490572453} +12/29/2021 00:43:21 - INFO - codeparrot_training - Step 46071: {'lr': 7.632326542960228e-06, 'samples': 23588864, 'steps': 46071, 'batch_loss/train': 0.74715765286237} +12/29/2021 00:43:32 - INFO - codeparrot_training - Step 46072: {'lr': 7.628461790641833e-06, 'samples': 23589376, 'steps': 46072, 'batch_loss/train': 0.7774887047708035} +12/29/2021 00:43:42 - INFO - codeparrot_training - Step 46073: {'lr': 7.624598001901128e-06, 'samples': 23589888, 'steps': 46073, 'batch_loss/train': 0.8510266400408} +12/29/2021 00:43:56 - INFO - codeparrot_training - Step 46074: {'lr': 7.620735176753574e-06, 'samples': 23590400, 'steps': 46074, 'batch_loss/train': 0.7470411881804466} +12/29/2021 00:44:07 - INFO - codeparrot_training - Step 46075: {'lr': 7.616873315214407e-06, 'samples': 23590912, 'steps': 46075, 'batch_loss/train': 0.652649667696096} +12/29/2021 00:44:17 - INFO - codeparrot_training - Step 46076: {'lr': 7.613012417299059e-06, 'samples': 23591424, 'steps': 46076, 'batch_loss/train': 0.6487501277588308} +12/29/2021 00:44:30 - INFO - codeparrot_training - Step 46077: {'lr': 7.609152483022825e-06, 'samples': 23591936, 'steps': 46077, 'batch_loss/train': 0.776784653775394} +12/29/2021 00:44:40 - INFO - codeparrot_training - Step 46078: {'lr': 7.6052935124011065e-06, 'samples': 23592448, 'steps': 46078, 'batch_loss/train': 1.2857203278690577} +12/29/2021 00:44:51 - INFO - codeparrot_training - Step 46079: {'lr': 7.6014355054492e-06, 'samples': 23592960, 'steps': 46079, 'batch_loss/train': 0.9595059405546635} +12/29/2021 00:45:02 - INFO - codeparrot_training - Step 46080: {'lr': 7.5975784621824805e-06, 'samples': 23593472, 'steps': 46080, 'batch_loss/train': 0.8367793486686423} +12/29/2021 00:45:14 - INFO - codeparrot_training - Step 46081: {'lr': 7.593722382616214e-06, 'samples': 23593984, 'steps': 46081, 'batch_loss/train': 0.4223478354688268} +12/29/2021 00:45:25 - INFO - codeparrot_training - Step 46082: {'lr': 7.589867266765804e-06, 'samples': 23594496, 'steps': 46082, 'batch_loss/train': 0.7643743762746453} +12/29/2021 00:45:35 - INFO - codeparrot_training - Step 46083: {'lr': 7.586013114646573e-06, 'samples': 23595008, 'steps': 46083, 'batch_loss/train': 0.7643522620201111} +12/29/2021 00:45:49 - INFO - codeparrot_training - Step 46084: {'lr': 7.582159926273785e-06, 'samples': 23595520, 'steps': 46084, 'batch_loss/train': 0.7487380364909768} +12/29/2021 00:46:00 - INFO - codeparrot_training - Step 46085: {'lr': 7.5783077016628175e-06, 'samples': 23596032, 'steps': 46085, 'batch_loss/train': 0.7736040120944381} +12/29/2021 00:46:11 - INFO - codeparrot_training - Step 46086: {'lr': 7.574456440828964e-06, 'samples': 23596544, 'steps': 46086, 'batch_loss/train': 0.752053695556242} +12/29/2021 00:46:23 - INFO - codeparrot_training - Step 46087: {'lr': 7.570606143787545e-06, 'samples': 23597056, 'steps': 46087, 'batch_loss/train': 0.7574810807127506} +12/29/2021 00:46:34 - INFO - codeparrot_training - Step 46088: {'lr': 7.5667568105537986e-06, 'samples': 23597568, 'steps': 46088, 'batch_loss/train': 0.7156584984622896} +12/29/2021 00:46:44 - INFO - codeparrot_training - Step 46089: {'lr': 7.562908441143185e-06, 'samples': 23598080, 'steps': 46089, 'batch_loss/train': 0.7088486095890403} +12/29/2021 00:46:55 - INFO - codeparrot_training - Step 46090: {'lr': 7.55906103557083e-06, 'samples': 23598592, 'steps': 46090, 'batch_loss/train': 0.7175275944173336} +12/29/2021 00:47:07 - INFO - codeparrot_training - Step 46091: {'lr': 7.555214593852139e-06, 'samples': 23599104, 'steps': 46091, 'batch_loss/train': 0.7646175944246352} +12/29/2021 00:47:18 - INFO - codeparrot_training - Step 46092: {'lr': 7.551369116002376e-06, 'samples': 23599616, 'steps': 46092, 'batch_loss/train': 0.7613656925968826} +12/29/2021 00:47:28 - INFO - codeparrot_training - Step 46093: {'lr': 7.54752460203681e-06, 'samples': 23600128, 'steps': 46093, 'batch_loss/train': 0.9365900354459882} +12/29/2021 00:47:42 - INFO - codeparrot_training - Step 46094: {'lr': 7.54368105197073e-06, 'samples': 23600640, 'steps': 46094, 'batch_loss/train': 0.6302250448497944} +12/29/2021 00:47:53 - INFO - codeparrot_training - Step 46095: {'lr': 7.539838465819432e-06, 'samples': 23601152, 'steps': 46095, 'batch_loss/train': 0.6902771731838584} +12/29/2021 00:48:04 - INFO - codeparrot_training - Step 46096: {'lr': 7.5359968435982086e-06, 'samples': 23601664, 'steps': 46096, 'batch_loss/train': 0.7090713521465659} +12/29/2021 00:48:16 - INFO - codeparrot_training - Step 46097: {'lr': 7.53215618532227e-06, 'samples': 23602176, 'steps': 46097, 'batch_loss/train': 0.691749129910022} +12/29/2021 00:48:27 - INFO - codeparrot_training - Step 46098: {'lr': 7.5283164910069925e-06, 'samples': 23602688, 'steps': 46098, 'batch_loss/train': 0.6781820463947952} +12/29/2021 00:48:37 - INFO - codeparrot_training - Step 46099: {'lr': 7.524477760667503e-06, 'samples': 23603200, 'steps': 46099, 'batch_loss/train': 0.8011603783816099} +12/29/2021 00:48:48 - INFO - codeparrot_training - Step 46100: {'lr': 7.520639994319179e-06, 'samples': 23603712, 'steps': 46100, 'batch_loss/train': 0.710551664698869} +12/29/2021 00:49:02 - INFO - codeparrot_training - Step 46101: {'lr': 7.516803191977229e-06, 'samples': 23604224, 'steps': 46101, 'batch_loss/train': 0.642996154492721} +12/29/2021 00:49:12 - INFO - codeparrot_training - Step 46102: {'lr': 7.512967353656891e-06, 'samples': 23604736, 'steps': 46102, 'batch_loss/train': 0.7298121824860573} +12/29/2021 00:49:23 - INFO - codeparrot_training - Step 46103: {'lr': 7.50913247937346e-06, 'samples': 23605248, 'steps': 46103, 'batch_loss/train': 1.1661484638461843} +12/29/2021 00:49:35 - INFO - codeparrot_training - Step 46104: {'lr': 7.505298569142144e-06, 'samples': 23605760, 'steps': 46104, 'batch_loss/train': 0.7100855065509677} +12/29/2021 00:49:46 - INFO - codeparrot_training - Step 46105: {'lr': 7.501465622978182e-06, 'samples': 23606272, 'steps': 46105, 'batch_loss/train': 0.9259011810645461} +12/29/2021 00:49:56 - INFO - codeparrot_training - Step 46106: {'lr': 7.497633640896867e-06, 'samples': 23606784, 'steps': 46106, 'batch_loss/train': 0.7463904283358715} +12/29/2021 00:50:09 - INFO - codeparrot_training - Step 46107: {'lr': 7.493802622913354e-06, 'samples': 23607296, 'steps': 46107, 'batch_loss/train': 0.7572878126520663} +12/29/2021 00:50:19 - INFO - codeparrot_training - Step 46108: {'lr': 7.4899725690429355e-06, 'samples': 23607808, 'steps': 46108, 'batch_loss/train': 0.6488332080189139} +12/29/2021 00:50:30 - INFO - codeparrot_training - Step 46109: {'lr': 7.48614347930085e-06, 'samples': 23608320, 'steps': 46109, 'batch_loss/train': 0.5184781433781609} +12/29/2021 00:50:41 - INFO - codeparrot_training - Step 46110: {'lr': 7.482315353702224e-06, 'samples': 23608832, 'steps': 46110, 'batch_loss/train': 0.5006965766660869} +12/29/2021 00:50:55 - INFO - codeparrot_training - Step 46111: {'lr': 7.478488192262379e-06, 'samples': 23609344, 'steps': 46111, 'batch_loss/train': 0.646544421557337} +12/29/2021 00:51:05 - INFO - codeparrot_training - Step 46112: {'lr': 7.474661994996496e-06, 'samples': 23609856, 'steps': 46112, 'batch_loss/train': 0.6832068683579564} +12/29/2021 00:51:16 - INFO - codeparrot_training - Step 46113: {'lr': 7.470836761919786e-06, 'samples': 23610368, 'steps': 46113, 'batch_loss/train': 0.712757283821702} +12/29/2021 00:51:28 - INFO - codeparrot_training - Step 46114: {'lr': 7.4670124930474035e-06, 'samples': 23610880, 'steps': 46114, 'batch_loss/train': 0.7885854300111532} +12/29/2021 00:51:39 - INFO - codeparrot_training - Step 46115: {'lr': 7.46318918839467e-06, 'samples': 23611392, 'steps': 46115, 'batch_loss/train': 0.7425829991698265} +12/29/2021 00:51:49 - INFO - codeparrot_training - Step 46116: {'lr': 7.459366847976684e-06, 'samples': 23611904, 'steps': 46116, 'batch_loss/train': 0.7382542649284005} +12/29/2021 00:52:04 - INFO - codeparrot_training - Step 46117: {'lr': 7.455545471808684e-06, 'samples': 23612416, 'steps': 46117, 'batch_loss/train': 0.7302944269031286} +12/29/2021 00:52:14 - INFO - codeparrot_training - Step 46118: {'lr': 7.451725059905851e-06, 'samples': 23612928, 'steps': 46118, 'batch_loss/train': 0.5684101199731231} +12/29/2021 00:52:25 - INFO - codeparrot_training - Step 46119: {'lr': 7.447905612283368e-06, 'samples': 23613440, 'steps': 46119, 'batch_loss/train': 0.757981551811099} +12/29/2021 00:52:35 - INFO - codeparrot_training - Step 46120: {'lr': 7.444087128956417e-06, 'samples': 23613952, 'steps': 46120, 'batch_loss/train': 0.7379559352993965} +12/29/2021 00:52:48 - INFO - codeparrot_training - Step 46121: {'lr': 7.440269609940209e-06, 'samples': 23614464, 'steps': 46121, 'batch_loss/train': 0.6861645546741784} +12/29/2021 00:52:58 - INFO - codeparrot_training - Step 46122: {'lr': 7.436453055249898e-06, 'samples': 23614976, 'steps': 46122, 'batch_loss/train': 0.7051184894517064} +12/29/2021 00:53:09 - INFO - codeparrot_training - Step 46123: {'lr': 7.4326374649006655e-06, 'samples': 23615488, 'steps': 46123, 'batch_loss/train': 0.7543215677142143} +12/29/2021 00:53:21 - INFO - codeparrot_training - Step 46124: {'lr': 7.428822838907695e-06, 'samples': 23616000, 'steps': 46124, 'batch_loss/train': 0.699434113688767} +12/29/2021 00:53:32 - INFO - codeparrot_training - Step 46125: {'lr': 7.425009177286085e-06, 'samples': 23616512, 'steps': 46125, 'batch_loss/train': 0.7182745207101107} +12/29/2021 00:53:42 - INFO - codeparrot_training - Step 46126: {'lr': 7.421196480051101e-06, 'samples': 23617024, 'steps': 46126, 'batch_loss/train': 0.7635036413557827} +12/29/2021 00:53:56 - INFO - codeparrot_training - Step 46127: {'lr': 7.417384747217815e-06, 'samples': 23617536, 'steps': 46127, 'batch_loss/train': 0.7291857525706291} +12/29/2021 00:54:07 - INFO - codeparrot_training - Step 46128: {'lr': 7.413573978801435e-06, 'samples': 23618048, 'steps': 46128, 'batch_loss/train': 0.7528557204641402} +12/29/2021 00:54:17 - INFO - codeparrot_training - Step 46129: {'lr': 7.409764174817063e-06, 'samples': 23618560, 'steps': 46129, 'batch_loss/train': 0.7379535529762506} +12/29/2021 00:54:30 - INFO - codeparrot_training - Step 46130: {'lr': 7.405955335279879e-06, 'samples': 23619072, 'steps': 46130, 'batch_loss/train': 0.7018925151787698} +12/29/2021 00:54:40 - INFO - codeparrot_training - Step 46131: {'lr': 7.402147460205011e-06, 'samples': 23619584, 'steps': 46131, 'batch_loss/train': 0.7635609852150083} +12/29/2021 00:54:51 - INFO - codeparrot_training - Step 46132: {'lr': 7.398340549607585e-06, 'samples': 23620096, 'steps': 46132, 'batch_loss/train': 0.747687438968569} +12/29/2021 00:55:02 - INFO - codeparrot_training - Step 46133: {'lr': 7.394534603502784e-06, 'samples': 23620608, 'steps': 46133, 'batch_loss/train': 0.7271363468607888} +12/29/2021 00:55:14 - INFO - codeparrot_training - Step 46134: {'lr': 7.390729621905706e-06, 'samples': 23621120, 'steps': 46134, 'batch_loss/train': 0.6930834539234638} +12/29/2021 00:55:24 - INFO - codeparrot_training - Step 46135: {'lr': 7.386925604831479e-06, 'samples': 23621632, 'steps': 46135, 'batch_loss/train': 0.7331688974518329} +12/29/2021 00:55:35 - INFO - codeparrot_training - Step 46136: {'lr': 7.383122552295201e-06, 'samples': 23622144, 'steps': 46136, 'batch_loss/train': 0.8414242451544851} +12/29/2021 00:55:49 - INFO - codeparrot_training - Step 46137: {'lr': 7.379320464312056e-06, 'samples': 23622656, 'steps': 46137, 'batch_loss/train': 0.6853368598967791} +12/29/2021 00:55:59 - INFO - codeparrot_training - Step 46138: {'lr': 7.375519340897113e-06, 'samples': 23623168, 'steps': 46138, 'batch_loss/train': 0.75620127748698} +12/29/2021 00:56:10 - INFO - codeparrot_training - Step 46139: {'lr': 7.371719182065501e-06, 'samples': 23623680, 'steps': 46139, 'batch_loss/train': 0.7006192323751748} +12/29/2021 00:56:22 - INFO - codeparrot_training - Step 46140: {'lr': 7.367919987832289e-06, 'samples': 23624192, 'steps': 46140, 'batch_loss/train': 0.7306457150261849} +12/29/2021 00:56:33 - INFO - codeparrot_training - Step 46141: {'lr': 7.364121758212661e-06, 'samples': 23624704, 'steps': 46141, 'batch_loss/train': 0.7505391398444772} +12/29/2021 00:56:43 - INFO - codeparrot_training - Step 46142: {'lr': 7.3603244932216316e-06, 'samples': 23625216, 'steps': 46142, 'batch_loss/train': 0.7397911106236279} +12/29/2021 00:56:54 - INFO - codeparrot_training - Step 46143: {'lr': 7.356528192874329e-06, 'samples': 23625728, 'steps': 46143, 'batch_loss/train': 0.848889296175912} +12/29/2021 00:57:08 - INFO - codeparrot_training - Step 46144: {'lr': 7.352732857185879e-06, 'samples': 23626240, 'steps': 46144, 'batch_loss/train': 0.660590874729678} +12/29/2021 00:57:19 - INFO - codeparrot_training - Step 46145: {'lr': 7.348938486171325e-06, 'samples': 23626752, 'steps': 46145, 'batch_loss/train': 0.7536396835930645} +12/29/2021 00:57:29 - INFO - codeparrot_training - Step 46146: {'lr': 7.3451450798457945e-06, 'samples': 23627264, 'steps': 46146, 'batch_loss/train': 0.6587454058462754} +12/29/2021 00:57:41 - INFO - codeparrot_training - Step 46147: {'lr': 7.341352638224302e-06, 'samples': 23627776, 'steps': 46147, 'batch_loss/train': 0.748751358827576} +12/29/2021 00:57:52 - INFO - codeparrot_training - Step 46148: {'lr': 7.337561161322004e-06, 'samples': 23628288, 'steps': 46148, 'batch_loss/train': 0.78042056504637} +12/29/2021 00:58:03 - INFO - codeparrot_training - Step 46149: {'lr': 7.333770649153915e-06, 'samples': 23628800, 'steps': 46149, 'batch_loss/train': 0.7203565633390099} +12/29/2021 00:58:15 - INFO - codeparrot_training - Step 46150: {'lr': 7.329981101735161e-06, 'samples': 23629312, 'steps': 46150, 'batch_loss/train': 0.82735530519858} +12/29/2021 00:58:26 - INFO - codeparrot_training - Step 46151: {'lr': 7.326192519080732e-06, 'samples': 23629824, 'steps': 46151, 'batch_loss/train': 0.7470391783863306} +12/29/2021 00:58:36 - INFO - codeparrot_training - Step 46152: {'lr': 7.322404901205726e-06, 'samples': 23630336, 'steps': 46152, 'batch_loss/train': 0.9156142910942435} +12/29/2021 00:58:47 - INFO - codeparrot_training - Step 46153: {'lr': 7.318618248125269e-06, 'samples': 23630848, 'steps': 46153, 'batch_loss/train': 0.9072473386768252} +12/29/2021 00:58:59 - INFO - codeparrot_training - Step 46154: {'lr': 7.314832559854323e-06, 'samples': 23631360, 'steps': 46154, 'batch_loss/train': 0.7357209185138345} +12/29/2021 00:59:10 - INFO - codeparrot_training - Step 46155: {'lr': 7.3110478364079305e-06, 'samples': 23631872, 'steps': 46155, 'batch_loss/train': 0.6507665608078241} +12/29/2021 00:59:20 - INFO - codeparrot_training - Step 46156: {'lr': 7.307264077801218e-06, 'samples': 23632384, 'steps': 46156, 'batch_loss/train': 0.7370003708638251} +12/29/2021 00:59:34 - INFO - codeparrot_training - Step 46157: {'lr': 7.3034812840491736e-06, 'samples': 23632896, 'steps': 46157, 'batch_loss/train': 0.5970797234913334} +12/29/2021 00:59:45 - INFO - codeparrot_training - Step 46158: {'lr': 7.299699455166842e-06, 'samples': 23633408, 'steps': 46158, 'batch_loss/train': 0.762351471115835} +12/29/2021 00:59:56 - INFO - codeparrot_training - Step 46159: {'lr': 7.295918591169265e-06, 'samples': 23633920, 'steps': 46159, 'batch_loss/train': 0.795914996881038} +12/29/2021 01:00:08 - INFO - codeparrot_training - Step 46160: {'lr': 7.292138692071487e-06, 'samples': 23634432, 'steps': 46160, 'batch_loss/train': 0.8374386923387647} +12/29/2021 01:00:19 - INFO - codeparrot_training - Step 46161: {'lr': 7.288359757888524e-06, 'samples': 23634944, 'steps': 46161, 'batch_loss/train': 0.7433890444226563} +12/29/2021 01:00:29 - INFO - codeparrot_training - Step 46162: {'lr': 7.284581788635364e-06, 'samples': 23635456, 'steps': 46162, 'batch_loss/train': 0.7236845414154232} +12/29/2021 01:00:40 - INFO - codeparrot_training - Step 46163: {'lr': 7.280804784327105e-06, 'samples': 23635968, 'steps': 46163, 'batch_loss/train': 0.6676226185518317} +12/29/2021 01:00:52 - INFO - codeparrot_training - Step 46164: {'lr': 7.2770287449787085e-06, 'samples': 23636480, 'steps': 46164, 'batch_loss/train': 0.7470121532678604} +12/29/2021 01:01:03 - INFO - codeparrot_training - Step 46165: {'lr': 7.27325367060519e-06, 'samples': 23636992, 'steps': 46165, 'batch_loss/train': 0.724197366158478} +12/29/2021 01:01:13 - INFO - codeparrot_training - Step 46166: {'lr': 7.269479561221537e-06, 'samples': 23637504, 'steps': 46166, 'batch_loss/train': 0.7229508580639958} +12/29/2021 01:01:27 - INFO - codeparrot_training - Step 46167: {'lr': 7.265706416842849e-06, 'samples': 23638016, 'steps': 46167, 'batch_loss/train': 0.7909890897572041} +12/29/2021 01:01:38 - INFO - codeparrot_training - Step 46168: {'lr': 7.2619342374840025e-06, 'samples': 23638528, 'steps': 46168, 'batch_loss/train': 0.705960646038875} +12/29/2021 01:01:48 - INFO - codeparrot_training - Step 46169: {'lr': 7.258163023160041e-06, 'samples': 23639040, 'steps': 46169, 'batch_loss/train': 0.7532181488350034} +12/29/2021 01:02:00 - INFO - codeparrot_training - Step 46170: {'lr': 7.2543927738860095e-06, 'samples': 23639552, 'steps': 46170, 'batch_loss/train': 0.7164127486757934} +12/29/2021 01:02:11 - INFO - codeparrot_training - Step 46171: {'lr': 7.250623489676838e-06, 'samples': 23640064, 'steps': 46171, 'batch_loss/train': 0.734382092487067} +12/29/2021 01:02:22 - INFO - codeparrot_training - Step 46172: {'lr': 7.246855170547545e-06, 'samples': 23640576, 'steps': 46172, 'batch_loss/train': 0.7484850212931633} +12/29/2021 01:02:35 - INFO - codeparrot_training - Step 46173: {'lr': 7.2430878165130885e-06, 'samples': 23641088, 'steps': 46173, 'batch_loss/train': 0.7366828750818968} +12/29/2021 01:02:46 - INFO - codeparrot_training - Step 46174: {'lr': 7.23932142758843e-06, 'samples': 23641600, 'steps': 46174, 'batch_loss/train': 0.7531184526160359} +12/29/2021 01:02:57 - INFO - codeparrot_training - Step 46175: {'lr': 7.235556003788585e-06, 'samples': 23642112, 'steps': 46175, 'batch_loss/train': 0.7868189113214612} +12/29/2021 01:03:07 - INFO - codeparrot_training - Step 46176: {'lr': 7.231791545128541e-06, 'samples': 23642624, 'steps': 46176, 'batch_loss/train': 0.770965036470443} +12/29/2021 01:03:19 - INFO - codeparrot_training - Step 46177: {'lr': 7.228028051623148e-06, 'samples': 23643136, 'steps': 46177, 'batch_loss/train': 0.782727780751884} +12/29/2021 01:03:30 - INFO - codeparrot_training - Step 46178: {'lr': 7.224265523287504e-06, 'samples': 23643648, 'steps': 46178, 'batch_loss/train': 0.701204986544326} +12/29/2021 01:03:40 - INFO - codeparrot_training - Step 46179: {'lr': 7.220503960136515e-06, 'samples': 23644160, 'steps': 46179, 'batch_loss/train': 0.7088749501854181} +12/29/2021 01:03:53 - INFO - codeparrot_training - Step 46180: {'lr': 7.216743362185085e-06, 'samples': 23644672, 'steps': 46180, 'batch_loss/train': 0.7237128049600869} +12/29/2021 01:04:03 - INFO - codeparrot_training - Step 46181: {'lr': 7.212983729448258e-06, 'samples': 23645184, 'steps': 46181, 'batch_loss/train': 0.8180809828918427} +12/29/2021 01:04:14 - INFO - codeparrot_training - Step 46182: {'lr': 7.209225061940938e-06, 'samples': 23645696, 'steps': 46182, 'batch_loss/train': 0.754977002274245} +12/29/2021 01:04:26 - INFO - codeparrot_training - Step 46183: {'lr': 7.205467359678031e-06, 'samples': 23646208, 'steps': 46183, 'batch_loss/train': 0.6070831613615155} +12/29/2021 01:04:37 - INFO - codeparrot_training - Step 46184: {'lr': 7.2017106226745235e-06, 'samples': 23646720, 'steps': 46184, 'batch_loss/train': 0.7821896076202393} +12/29/2021 01:04:48 - INFO - codeparrot_training - Step 46185: {'lr': 7.197954850945349e-06, 'samples': 23647232, 'steps': 46185, 'batch_loss/train': 1.0508014438673854} +12/29/2021 01:04:58 - INFO - codeparrot_training - Step 46186: {'lr': 7.194200044505411e-06, 'samples': 23647744, 'steps': 46186, 'batch_loss/train': 0.7455510310828686} +12/29/2021 01:05:12 - INFO - codeparrot_training - Step 46187: {'lr': 7.190446203369671e-06, 'samples': 23648256, 'steps': 46187, 'batch_loss/train': 0.7275038491934538} +12/29/2021 01:05:23 - INFO - codeparrot_training - Step 46188: {'lr': 7.186693327553007e-06, 'samples': 23648768, 'steps': 46188, 'batch_loss/train': 0.7293491405434906} +12/29/2021 01:05:34 - INFO - codeparrot_training - Step 46189: {'lr': 7.182941417070405e-06, 'samples': 23649280, 'steps': 46189, 'batch_loss/train': 0.7273933060932904} +12/29/2021 01:05:46 - INFO - codeparrot_training - Step 46190: {'lr': 7.179190471936742e-06, 'samples': 23649792, 'steps': 46190, 'batch_loss/train': 0.6605210350826383} +12/29/2021 01:05:56 - INFO - codeparrot_training - Step 46191: {'lr': 7.175440492166896e-06, 'samples': 23650304, 'steps': 46191, 'batch_loss/train': 0.6910445131361485} +12/29/2021 01:06:07 - INFO - codeparrot_training - Step 46192: {'lr': 7.171691477775827e-06, 'samples': 23650816, 'steps': 46192, 'batch_loss/train': 0.8196679381653666} +12/29/2021 01:06:20 - INFO - codeparrot_training - Step 46193: {'lr': 7.167943428778439e-06, 'samples': 23651328, 'steps': 46193, 'batch_loss/train': 0.7217513830401003} +12/29/2021 01:06:30 - INFO - codeparrot_training - Step 46194: {'lr': 7.164196345189611e-06, 'samples': 23651840, 'steps': 46194, 'batch_loss/train': 1.0611712979152799} +12/29/2021 01:06:41 - INFO - codeparrot_training - Step 46195: {'lr': 7.160450227024218e-06, 'samples': 23652352, 'steps': 46195, 'batch_loss/train': 0.7096968041732907} +12/29/2021 01:06:51 - INFO - codeparrot_training - Step 46196: {'lr': 7.1567050742972205e-06, 'samples': 23652864, 'steps': 46196, 'batch_loss/train': 0.6942191594280303} +12/29/2021 01:07:05 - INFO - codeparrot_training - Step 46197: {'lr': 7.152960887023469e-06, 'samples': 23653376, 'steps': 46197, 'batch_loss/train': 0.7020041110226884} +12/29/2021 01:07:16 - INFO - codeparrot_training - Step 46198: {'lr': 7.149217665217838e-06, 'samples': 23653888, 'steps': 46198, 'batch_loss/train': 0.7217406018171459} +12/29/2021 01:07:27 - INFO - codeparrot_training - Step 46199: {'lr': 7.145475408895208e-06, 'samples': 23654400, 'steps': 46199, 'batch_loss/train': 0.6596042616292834} +12/29/2021 01:07:39 - INFO - codeparrot_training - Step 46200: {'lr': 7.141734118070481e-06, 'samples': 23654912, 'steps': 46200, 'batch_loss/train': 0.7254732502624393} +12/29/2021 01:07:49 - INFO - codeparrot_training - Step 46201: {'lr': 7.1379937927585074e-06, 'samples': 23655424, 'steps': 46201, 'batch_loss/train': 0.7648061108775437} +12/29/2021 01:08:00 - INFO - codeparrot_training - Step 46202: {'lr': 7.134254432974219e-06, 'samples': 23655936, 'steps': 46202, 'batch_loss/train': 0.7222114419564605} +12/29/2021 01:08:14 - INFO - codeparrot_training - Step 46203: {'lr': 7.1305160387323545e-06, 'samples': 23656448, 'steps': 46203, 'batch_loss/train': 0.7459703786298633} +12/29/2021 01:08:25 - INFO - codeparrot_training - Step 46204: {'lr': 7.126778610047902e-06, 'samples': 23656960, 'steps': 46204, 'batch_loss/train': 0.7434641337022185} +12/29/2021 01:08:35 - INFO - codeparrot_training - Step 46205: {'lr': 7.123042146935682e-06, 'samples': 23657472, 'steps': 46205, 'batch_loss/train': 0.652054303791374} +12/29/2021 01:08:46 - INFO - codeparrot_training - Step 46206: {'lr': 7.11930664941049e-06, 'samples': 23657984, 'steps': 46206, 'batch_loss/train': 0.6566412427928299} +12/29/2021 01:08:58 - INFO - codeparrot_training - Step 46207: {'lr': 7.115572117487284e-06, 'samples': 23658496, 'steps': 46207, 'batch_loss/train': 0.6843708814121783} +12/29/2021 01:09:09 - INFO - codeparrot_training - Step 46208: {'lr': 7.111838551180832e-06, 'samples': 23659008, 'steps': 46208, 'batch_loss/train': 0.6885039834305644} +12/29/2021 01:09:20 - INFO - codeparrot_training - Step 46209: {'lr': 7.108105950506011e-06, 'samples': 23659520, 'steps': 46209, 'batch_loss/train': 0.7184747685678303} +12/29/2021 01:09:32 - INFO - codeparrot_training - Step 46210: {'lr': 7.104374315477613e-06, 'samples': 23660032, 'steps': 46210, 'batch_loss/train': 0.6505536064505577} +12/29/2021 01:09:43 - INFO - codeparrot_training - Step 46211: {'lr': 7.100643646110544e-06, 'samples': 23660544, 'steps': 46211, 'batch_loss/train': 0.6821804558858275} +12/29/2021 01:09:53 - INFO - codeparrot_training - Step 46212: {'lr': 7.096913942419569e-06, 'samples': 23661056, 'steps': 46212, 'batch_loss/train': 0.8628662582486868} +12/29/2021 01:10:04 - INFO - codeparrot_training - Step 46213: {'lr': 7.093185204419567e-06, 'samples': 23661568, 'steps': 46213, 'batch_loss/train': 0.9145271927118301} +12/29/2021 01:10:18 - INFO - codeparrot_training - Step 46214: {'lr': 7.089457432125329e-06, 'samples': 23662080, 'steps': 46214, 'batch_loss/train': 0.8192629935219884} +12/29/2021 01:10:29 - INFO - codeparrot_training - Step 46215: {'lr': 7.085730625551706e-06, 'samples': 23662592, 'steps': 46215, 'batch_loss/train': 0.7325714505277574} +12/29/2021 01:10:39 - INFO - codeparrot_training - Step 46216: {'lr': 7.082004784713519e-06, 'samples': 23663104, 'steps': 46216, 'batch_loss/train': 0.6907761273905635} +12/29/2021 01:10:52 - INFO - codeparrot_training - Step 46217: {'lr': 7.078279909625507e-06, 'samples': 23663616, 'steps': 46217, 'batch_loss/train': 0.6892951841582544} +12/29/2021 01:11:02 - INFO - codeparrot_training - Step 46218: {'lr': 7.074556000302545e-06, 'samples': 23664128, 'steps': 46218, 'batch_loss/train': 0.6516545359045267} +12/29/2021 01:11:13 - INFO - codeparrot_training - Step 46219: {'lr': 7.070833056759457e-06, 'samples': 23664640, 'steps': 46219, 'batch_loss/train': 0.8861193731427193} +12/29/2021 01:11:25 - INFO - codeparrot_training - Step 46220: {'lr': 7.06711107901098e-06, 'samples': 23665152, 'steps': 46220, 'batch_loss/train': 0.6639505963539705} +12/29/2021 01:11:36 - INFO - codeparrot_training - Step 46221: {'lr': 7.063390067071934e-06, 'samples': 23665664, 'steps': 46221, 'batch_loss/train': 0.6913990564644337} +12/29/2021 01:11:46 - INFO - codeparrot_training - Step 46222: {'lr': 7.059670020957143e-06, 'samples': 23666176, 'steps': 46222, 'batch_loss/train': 0.6751316576264799} +12/29/2021 01:11:57 - INFO - codeparrot_training - Step 46223: {'lr': 7.055950940681371e-06, 'samples': 23666688, 'steps': 46223, 'batch_loss/train': 0.6868224389618263} +12/29/2021 01:12:09 - INFO - codeparrot_training - Step 46224: {'lr': 7.052232826259386e-06, 'samples': 23667200, 'steps': 46224, 'batch_loss/train': 0.7436306085437536} +12/29/2021 01:12:20 - INFO - codeparrot_training - Step 46225: {'lr': 7.048515677706007e-06, 'samples': 23667712, 'steps': 46225, 'batch_loss/train': 0.7315280737821013} +12/29/2021 01:12:30 - INFO - codeparrot_training - Step 46226: {'lr': 7.0447994950359754e-06, 'samples': 23668224, 'steps': 46226, 'batch_loss/train': 0.8529402269050479} +12/29/2021 01:12:44 - INFO - codeparrot_training - Step 46227: {'lr': 7.041084278264109e-06, 'samples': 23668736, 'steps': 46227, 'batch_loss/train': 0.8174975384026766} +12/29/2021 01:12:55 - INFO - codeparrot_training - Step 46228: {'lr': 7.037370027405177e-06, 'samples': 23669248, 'steps': 46228, 'batch_loss/train': 0.6979124409845099} +12/29/2021 01:13:05 - INFO - codeparrot_training - Step 46229: {'lr': 7.03365674247386e-06, 'samples': 23669760, 'steps': 46229, 'batch_loss/train': 0.6533850373816676} +12/29/2021 01:13:17 - INFO - codeparrot_training - Step 46230: {'lr': 7.029944423485008e-06, 'samples': 23670272, 'steps': 46230, 'batch_loss/train': 0.7280495380982757} +12/29/2021 01:13:28 - INFO - codeparrot_training - Step 46231: {'lr': 7.026233070453386e-06, 'samples': 23670784, 'steps': 46231, 'batch_loss/train': 0.7611243552528322} +12/29/2021 01:13:39 - INFO - codeparrot_training - Step 46232: {'lr': 7.02252268339365e-06, 'samples': 23671296, 'steps': 46232, 'batch_loss/train': 0.7896661479026079} +12/29/2021 01:13:49 - INFO - codeparrot_training - Step 46233: {'lr': 7.018813262320678e-06, 'samples': 23671808, 'steps': 46233, 'batch_loss/train': 0.8249867912381887} +12/29/2021 01:14:03 - INFO - codeparrot_training - Step 46234: {'lr': 7.015104807249151e-06, 'samples': 23672320, 'steps': 46234, 'batch_loss/train': 0.7757427417673171} +12/29/2021 01:14:13 - INFO - codeparrot_training - Step 46235: {'lr': 7.011397318193807e-06, 'samples': 23672832, 'steps': 46235, 'batch_loss/train': 0.7189789566909894} +12/29/2021 01:14:24 - INFO - codeparrot_training - Step 46236: {'lr': 7.007690795169414e-06, 'samples': 23673344, 'steps': 46236, 'batch_loss/train': 0.7633631490170956} +12/29/2021 01:14:36 - INFO - codeparrot_training - Step 46237: {'lr': 7.003985238190652e-06, 'samples': 23673856, 'steps': 46237, 'batch_loss/train': 0.8518268330954015} +12/29/2021 01:14:47 - INFO - codeparrot_training - Step 46238: {'lr': 7.000280647272344e-06, 'samples': 23674368, 'steps': 46238, 'batch_loss/train': 0.7896272549405694} +12/29/2021 01:14:57 - INFO - codeparrot_training - Step 46239: {'lr': 6.996577022429118e-06, 'samples': 23674880, 'steps': 46239, 'batch_loss/train': 0.6629347607376985} +12/29/2021 01:15:09 - INFO - codeparrot_training - Step 46240: {'lr': 6.992874363675794e-06, 'samples': 23675392, 'steps': 46240, 'batch_loss/train': 0.8324075324926525} +12/29/2021 01:15:20 - INFO - codeparrot_training - Step 46241: {'lr': 6.989172671027e-06, 'samples': 23675904, 'steps': 46241, 'batch_loss/train': 0.8199469139799476} +12/29/2021 01:15:31 - INFO - codeparrot_training - Step 46242: {'lr': 6.985471944497529e-06, 'samples': 23676416, 'steps': 46242, 'batch_loss/train': 0.8471215311437845} +12/29/2021 01:15:44 - INFO - codeparrot_training - Step 46243: {'lr': 6.981772184102065e-06, 'samples': 23676928, 'steps': 46243, 'batch_loss/train': 0.7469015109236352} +12/29/2021 01:15:55 - INFO - codeparrot_training - Step 46244: {'lr': 6.978073389855288e-06, 'samples': 23677440, 'steps': 46244, 'batch_loss/train': 0.7544717926066369} +12/29/2021 01:16:06 - INFO - codeparrot_training - Step 46245: {'lr': 6.9743755617719675e-06, 'samples': 23677952, 'steps': 46245, 'batch_loss/train': 0.6821549953892827} +12/29/2021 01:16:16 - INFO - codeparrot_training - Step 46246: {'lr': 6.970678699866756e-06, 'samples': 23678464, 'steps': 46246, 'batch_loss/train': 0.6624634435866028} +12/29/2021 01:16:29 - INFO - codeparrot_training - Step 46247: {'lr': 6.966982804154337e-06, 'samples': 23678976, 'steps': 46247, 'batch_loss/train': 0.8087197067216039} +12/29/2021 01:16:39 - INFO - codeparrot_training - Step 46248: {'lr': 6.963287874649449e-06, 'samples': 23679488, 'steps': 46248, 'batch_loss/train': 0.7994593791663647} +12/29/2021 01:16:50 - INFO - codeparrot_training - Step 46249: {'lr': 6.959593911366774e-06, 'samples': 23680000, 'steps': 46249, 'batch_loss/train': 0.7627374036237597} +12/29/2021 01:17:04 - INFO - codeparrot_training - Step 46250: {'lr': 6.955900914320939e-06, 'samples': 23680512, 'steps': 46250, 'batch_loss/train': 0.6668106794822961} +12/29/2021 01:17:14 - INFO - codeparrot_training - Step 46251: {'lr': 6.952208883526712e-06, 'samples': 23681024, 'steps': 46251, 'batch_loss/train': 0.7649040995165706} +12/29/2021 01:17:25 - INFO - codeparrot_training - Step 46252: {'lr': 6.948517818998718e-06, 'samples': 23681536, 'steps': 46252, 'batch_loss/train': 0.7078189766034484} +12/29/2021 01:17:37 - INFO - codeparrot_training - Step 46253: {'lr': 6.94482772075164e-06, 'samples': 23682048, 'steps': 46253, 'batch_loss/train': 0.7372397584840655} +12/29/2021 01:17:48 - INFO - codeparrot_training - Step 46254: {'lr': 6.941138588800161e-06, 'samples': 23682560, 'steps': 46254, 'batch_loss/train': 0.7934325593523681} +12/29/2021 01:17:58 - INFO - codeparrot_training - Step 46255: {'lr': 6.937450423158937e-06, 'samples': 23683072, 'steps': 46255, 'batch_loss/train': 0.7569562918506563} +12/29/2021 01:18:09 - INFO - codeparrot_training - Step 46256: {'lr': 6.9337632238426204e-06, 'samples': 23683584, 'steps': 46256, 'batch_loss/train': 0.6708130785264075} +12/29/2021 01:18:21 - INFO - codeparrot_training - Step 46257: {'lr': 6.930076990865925e-06, 'samples': 23684096, 'steps': 46257, 'batch_loss/train': 0.6708346595987678} +12/29/2021 01:18:32 - INFO - codeparrot_training - Step 46258: {'lr': 6.926391724243419e-06, 'samples': 23684608, 'steps': 46258, 'batch_loss/train': 0.7829539831727743} +12/29/2021 01:18:42 - INFO - codeparrot_training - Step 46259: {'lr': 6.922707423989816e-06, 'samples': 23685120, 'steps': 46259, 'batch_loss/train': 0.8095382917672396} +12/29/2021 01:18:56 - INFO - codeparrot_training - Step 46260: {'lr': 6.919024090119769e-06, 'samples': 23685632, 'steps': 46260, 'batch_loss/train': 0.7618274139240384} +12/29/2021 01:19:07 - INFO - codeparrot_training - Step 46261: {'lr': 6.91534172264785e-06, 'samples': 23686144, 'steps': 46261, 'batch_loss/train': 0.5225009059067816} +12/29/2021 01:19:17 - INFO - codeparrot_training - Step 46262: {'lr': 6.91166032158877e-06, 'samples': 23686656, 'steps': 46262, 'batch_loss/train': 0.6918770079500973} +12/29/2021 01:19:30 - INFO - codeparrot_training - Step 46263: {'lr': 6.9079798869571556e-06, 'samples': 23687168, 'steps': 46263, 'batch_loss/train': 0.6697679008357227} +12/29/2021 01:19:40 - INFO - codeparrot_training - Step 46264: {'lr': 6.904300418767606e-06, 'samples': 23687680, 'steps': 46264, 'batch_loss/train': 1.4872042655479163} +12/29/2021 01:19:51 - INFO - codeparrot_training - Step 46265: {'lr': 6.900621917034777e-06, 'samples': 23688192, 'steps': 46265, 'batch_loss/train': 0.6504272841848433} +12/29/2021 01:20:02 - INFO - codeparrot_training - Step 46266: {'lr': 6.896944381773268e-06, 'samples': 23688704, 'steps': 46266, 'batch_loss/train': 0.7300825815182179} +12/29/2021 01:20:14 - INFO - codeparrot_training - Step 46267: {'lr': 6.893267812997733e-06, 'samples': 23689216, 'steps': 46267, 'batch_loss/train': 0.712629213463515} +12/29/2021 01:20:24 - INFO - codeparrot_training - Step 46268: {'lr': 6.889592210722773e-06, 'samples': 23689728, 'steps': 46268, 'batch_loss/train': 0.5522850994602777} +12/29/2021 01:20:35 - INFO - codeparrot_training - Step 46269: {'lr': 6.885917574962986e-06, 'samples': 23690240, 'steps': 46269, 'batch_loss/train': 0.600861796643585} +12/29/2021 01:20:47 - INFO - codeparrot_training - Step 46270: {'lr': 6.882243905733027e-06, 'samples': 23690752, 'steps': 46270, 'batch_loss/train': 0.7981493826955557} +12/29/2021 01:20:58 - INFO - codeparrot_training - Step 46271: {'lr': 6.878571203047468e-06, 'samples': 23691264, 'steps': 46271, 'batch_loss/train': 0.7695967219769955} +12/29/2021 01:21:09 - INFO - codeparrot_training - Step 46272: {'lr': 6.874899466920881e-06, 'samples': 23691776, 'steps': 46272, 'batch_loss/train': 0.900913305580616} +12/29/2021 01:21:24 - INFO - codeparrot_training - Step 46273: {'lr': 6.871228697367865e-06, 'samples': 23692288, 'steps': 46273, 'batch_loss/train': 0.6639525394421071} +12/29/2021 01:21:34 - INFO - codeparrot_training - Step 46274: {'lr': 6.867558894403103e-06, 'samples': 23692800, 'steps': 46274, 'batch_loss/train': 0.7001116685569286} +12/29/2021 01:21:45 - INFO - codeparrot_training - Step 46275: {'lr': 6.863890058041083e-06, 'samples': 23693312, 'steps': 46275, 'batch_loss/train': 0.4255490618816111} +12/29/2021 01:21:56 - INFO - codeparrot_training - Step 46276: {'lr': 6.860222188296461e-06, 'samples': 23693824, 'steps': 46276, 'batch_loss/train': 0.8049748670309782} +12/29/2021 01:22:08 - INFO - codeparrot_training - Step 46277: {'lr': 6.85655528518378e-06, 'samples': 23694336, 'steps': 46277, 'batch_loss/train': 1.3927613566629589} +12/29/2021 01:22:18 - INFO - codeparrot_training - Step 46278: {'lr': 6.852889348717611e-06, 'samples': 23694848, 'steps': 46278, 'batch_loss/train': 0.8262589322403073} +12/29/2021 01:22:29 - INFO - codeparrot_training - Step 46279: {'lr': 6.849224378912555e-06, 'samples': 23695360, 'steps': 46279, 'batch_loss/train': 0.8514679349027574} +12/29/2021 01:22:40 - INFO - codeparrot_training - Step 46280: {'lr': 6.845560375783183e-06, 'samples': 23695872, 'steps': 46280, 'batch_loss/train': 0.7071405901806429} +12/29/2021 01:22:52 - INFO - codeparrot_training - Step 46281: {'lr': 6.841897339344039e-06, 'samples': 23696384, 'steps': 46281, 'batch_loss/train': 0.7115461900830269} +12/29/2021 01:23:02 - INFO - codeparrot_training - Step 46282: {'lr': 6.838235269609694e-06, 'samples': 23696896, 'steps': 46282, 'batch_loss/train': 0.8111610822379589} +12/29/2021 01:23:13 - INFO - codeparrot_training - Step 46283: {'lr': 6.834574166594748e-06, 'samples': 23697408, 'steps': 46283, 'batch_loss/train': 0.7474937913939357} +12/29/2021 01:23:27 - INFO - codeparrot_training - Step 46284: {'lr': 6.830914030313662e-06, 'samples': 23697920, 'steps': 46284, 'batch_loss/train': 0.7335960129275918} +12/29/2021 01:23:37 - INFO - codeparrot_training - Step 46285: {'lr': 6.827254860781063e-06, 'samples': 23698432, 'steps': 46285, 'batch_loss/train': 0.6572745512239635} +12/29/2021 01:23:48 - INFO - codeparrot_training - Step 46286: {'lr': 6.823596658011522e-06, 'samples': 23698944, 'steps': 46286, 'batch_loss/train': 0.6699600927531719} +12/29/2021 01:24:00 - INFO - codeparrot_training - Step 46287: {'lr': 6.8199394220195e-06, 'samples': 23699456, 'steps': 46287, 'batch_loss/train': 0.6372411490301602} +12/29/2021 01:24:11 - INFO - codeparrot_training - Step 46288: {'lr': 6.816283152819597e-06, 'samples': 23699968, 'steps': 46288, 'batch_loss/train': 0.7629147057887167} +12/29/2021 01:24:22 - INFO - codeparrot_training - Step 46289: {'lr': 6.8126278504263284e-06, 'samples': 23700480, 'steps': 46289, 'batch_loss/train': 0.7757801217958331} +12/29/2021 01:24:36 - INFO - codeparrot_training - Step 46290: {'lr': 6.8089735148542105e-06, 'samples': 23700992, 'steps': 46290, 'batch_loss/train': 0.7836120137944818} +12/29/2021 01:24:46 - INFO - codeparrot_training - Step 46291: {'lr': 6.805320146117788e-06, 'samples': 23701504, 'steps': 46291, 'batch_loss/train': 0.7707259664312005} +12/29/2021 01:24:57 - INFO - codeparrot_training - Step 46292: {'lr': 6.8016677442316046e-06, 'samples': 23702016, 'steps': 46292, 'batch_loss/train': 0.7547893323935568} +12/29/2021 01:25:08 - INFO - codeparrot_training - Step 46293: {'lr': 6.798016309210148e-06, 'samples': 23702528, 'steps': 46293, 'batch_loss/train': 0.5706824902445078} +12/29/2021 01:25:20 - INFO - codeparrot_training - Step 46294: {'lr': 6.794365841067934e-06, 'samples': 23703040, 'steps': 46294, 'batch_loss/train': 0.634113761363551} +12/29/2021 01:25:30 - INFO - codeparrot_training - Step 46295: {'lr': 6.790716339819508e-06, 'samples': 23703552, 'steps': 46295, 'batch_loss/train': 0.5440907886950299} +12/29/2021 01:25:41 - INFO - codeparrot_training - Step 46296: {'lr': 6.787067805479386e-06, 'samples': 23704064, 'steps': 46296, 'batch_loss/train': 0.8391356584616005} +12/29/2021 01:25:55 - INFO - codeparrot_training - Step 46297: {'lr': 6.783420238062027e-06, 'samples': 23704576, 'steps': 46297, 'batch_loss/train': 0.7647813918301836} +12/29/2021 01:26:06 - INFO - codeparrot_training - Step 46298: {'lr': 6.779773637581977e-06, 'samples': 23705088, 'steps': 46298, 'batch_loss/train': 0.7438942054286599} +12/29/2021 01:26:16 - INFO - codeparrot_training - Step 46299: {'lr': 6.776128004053639e-06, 'samples': 23705600, 'steps': 46299, 'batch_loss/train': 1.5012119645252824} +12/29/2021 01:26:29 - INFO - codeparrot_training - Step 46300: {'lr': 6.7724833374916696e-06, 'samples': 23706112, 'steps': 46300, 'batch_loss/train': 0.7234385469928384} +12/29/2021 01:26:39 - INFO - codeparrot_training - Step 46301: {'lr': 6.768839637910418e-06, 'samples': 23706624, 'steps': 46301, 'batch_loss/train': 0.7957325587049127} +12/29/2021 01:26:50 - INFO - codeparrot_training - Step 46302: {'lr': 6.765196905324428e-06, 'samples': 23707136, 'steps': 46302, 'batch_loss/train': 0.8175824042409658} +12/29/2021 01:27:01 - INFO - codeparrot_training - Step 46303: {'lr': 6.76155513974816e-06, 'samples': 23707648, 'steps': 46303, 'batch_loss/train': 0.4967504390515387} +12/29/2021 01:27:13 - INFO - codeparrot_training - Step 46304: {'lr': 6.757914341196103e-06, 'samples': 23708160, 'steps': 46304, 'batch_loss/train': 0.7757249316200614} +12/29/2021 01:27:23 - INFO - codeparrot_training - Step 46305: {'lr': 6.7542745096827454e-06, 'samples': 23708672, 'steps': 46305, 'batch_loss/train': 0.6509712147526443} +12/29/2021 01:27:34 - INFO - codeparrot_training - Step 46306: {'lr': 6.750635645222547e-06, 'samples': 23709184, 'steps': 46306, 'batch_loss/train': 0.7405125377699733} +12/29/2021 01:27:46 - INFO - codeparrot_training - Step 46307: {'lr': 6.746997747829997e-06, 'samples': 23709696, 'steps': 46307, 'batch_loss/train': 0.8073592251166701} +12/29/2021 01:27:57 - INFO - codeparrot_training - Step 46308: {'lr': 6.743360817519501e-06, 'samples': 23710208, 'steps': 46308, 'batch_loss/train': 0.7924033966846764} +12/29/2021 01:28:07 - INFO - codeparrot_training - Step 46309: {'lr': 6.739724854305574e-06, 'samples': 23710720, 'steps': 46309, 'batch_loss/train': 0.7312837494537234} +12/29/2021 01:28:20 - INFO - codeparrot_training - Step 46310: {'lr': 6.7360898582026205e-06, 'samples': 23711232, 'steps': 46310, 'batch_loss/train': 0.649235287681222} +12/29/2021 01:28:30 - INFO - codeparrot_training - Step 46311: {'lr': 6.732455829225131e-06, 'samples': 23711744, 'steps': 46311, 'batch_loss/train': 0.7372621762333438} +12/29/2021 01:28:41 - INFO - codeparrot_training - Step 46312: {'lr': 6.728822767387594e-06, 'samples': 23712256, 'steps': 46312, 'batch_loss/train': 0.7986095556989312} +12/29/2021 01:28:54 - INFO - codeparrot_training - Step 46313: {'lr': 6.725190672704357e-06, 'samples': 23712768, 'steps': 46313, 'batch_loss/train': 0.7626429637894034} +12/29/2021 01:29:05 - INFO - codeparrot_training - Step 46314: {'lr': 6.721559545189881e-06, 'samples': 23713280, 'steps': 46314, 'batch_loss/train': 0.7584756473079324} +12/29/2021 01:29:16 - INFO - codeparrot_training - Step 46315: {'lr': 6.717929384858684e-06, 'samples': 23713792, 'steps': 46315, 'batch_loss/train': 0.7809131587855518} +12/29/2021 01:29:26 - INFO - codeparrot_training - Step 46316: {'lr': 6.714300191725087e-06, 'samples': 23714304, 'steps': 46316, 'batch_loss/train': 0.7041197493090294} +12/29/2021 01:29:38 - INFO - codeparrot_training - Step 46317: {'lr': 6.710671965803605e-06, 'samples': 23714816, 'steps': 46317, 'batch_loss/train': 0.7453270321711898} +12/29/2021 01:29:49 - INFO - codeparrot_training - Step 46318: {'lr': 6.7070447071086176e-06, 'samples': 23715328, 'steps': 46318, 'batch_loss/train': 0.7665092488750815} +12/29/2021 01:30:00 - INFO - codeparrot_training - Step 46319: {'lr': 6.703418415654555e-06, 'samples': 23715840, 'steps': 46319, 'batch_loss/train': 0.7956178714521229} +12/29/2021 01:30:14 - INFO - codeparrot_training - Step 46320: {'lr': 6.699793091455825e-06, 'samples': 23716352, 'steps': 46320, 'batch_loss/train': 0.6499349007208366} +12/29/2021 01:30:24 - INFO - codeparrot_training - Step 46321: {'lr': 6.6961687345268585e-06, 'samples': 23716864, 'steps': 46321, 'batch_loss/train': 0.8171388609334826} +12/29/2021 01:30:35 - INFO - codeparrot_training - Step 46322: {'lr': 6.692545344882062e-06, 'samples': 23717376, 'steps': 46322, 'batch_loss/train': 0.7550914604216814} +12/29/2021 01:30:47 - INFO - codeparrot_training - Step 46323: {'lr': 6.688922922535839e-06, 'samples': 23717888, 'steps': 46323, 'batch_loss/train': 0.6758306189440191} +12/29/2021 01:30:58 - INFO - codeparrot_training - Step 46324: {'lr': 6.685301467502597e-06, 'samples': 23718400, 'steps': 46324, 'batch_loss/train': 0.6977033521980047} +12/29/2021 01:31:09 - INFO - codeparrot_training - Step 46325: {'lr': 6.681680979796684e-06, 'samples': 23718912, 'steps': 46325, 'batch_loss/train': 0.7336985701695085} +12/29/2021 01:31:21 - INFO - codeparrot_training - Step 46326: {'lr': 6.678061459432589e-06, 'samples': 23719424, 'steps': 46326, 'batch_loss/train': 0.6095302058383822} +12/29/2021 01:31:31 - INFO - codeparrot_training - Step 46327: {'lr': 6.6744429064246324e-06, 'samples': 23719936, 'steps': 46327, 'batch_loss/train': 0.7876489413902164} +12/29/2021 01:31:42 - INFO - codeparrot_training - Step 46328: {'lr': 6.6708253207871664e-06, 'samples': 23720448, 'steps': 46328, 'batch_loss/train': 0.7230468858033419} +12/29/2021 01:31:52 - INFO - codeparrot_training - Step 46329: {'lr': 6.667208702534678e-06, 'samples': 23720960, 'steps': 46329, 'batch_loss/train': 0.7176405619829893} +12/29/2021 01:32:06 - INFO - codeparrot_training - Step 46330: {'lr': 6.66359305168146e-06, 'samples': 23721472, 'steps': 46330, 'batch_loss/train': 0.8081347523257136} +12/29/2021 01:32:16 - INFO - codeparrot_training - Step 46331: {'lr': 6.65997836824192e-06, 'samples': 23721984, 'steps': 46331, 'batch_loss/train': 1.100878200493753} +12/29/2021 01:32:27 - INFO - codeparrot_training - Step 46332: {'lr': 6.656364652230434e-06, 'samples': 23722496, 'steps': 46332, 'batch_loss/train': 0.7858423050493002} +12/29/2021 01:32:39 - INFO - codeparrot_training - Step 46333: {'lr': 6.652751903661353e-06, 'samples': 23723008, 'steps': 46333, 'batch_loss/train': 0.7421044547809288} +12/29/2021 01:32:50 - INFO - codeparrot_training - Step 46334: {'lr': 6.649140122549052e-06, 'samples': 23723520, 'steps': 46334, 'batch_loss/train': 0.7245556600391865} +12/29/2021 01:33:01 - INFO - codeparrot_training - Step 46335: {'lr': 6.645529308907883e-06, 'samples': 23724032, 'steps': 46335, 'batch_loss/train': 0.6867295685224235} +12/29/2021 01:33:11 - INFO - codeparrot_training - Step 46336: {'lr': 6.6419194627521665e-06, 'samples': 23724544, 'steps': 46336, 'batch_loss/train': 0.7139207175932825} +12/29/2021 01:33:25 - INFO - codeparrot_training - Step 46337: {'lr': 6.6383105840963355e-06, 'samples': 23725056, 'steps': 46337, 'batch_loss/train': 1.008720960933715} +12/29/2021 01:33:36 - INFO - codeparrot_training - Step 46338: {'lr': 6.634702672954685e-06, 'samples': 23725568, 'steps': 46338, 'batch_loss/train': 0.9933733409270644} +12/29/2021 01:33:46 - INFO - codeparrot_training - Step 46339: {'lr': 6.631095729341563e-06, 'samples': 23726080, 'steps': 46339, 'batch_loss/train': 0.6923622705508024} +12/29/2021 01:33:58 - INFO - codeparrot_training - Step 46340: {'lr': 6.627489753271293e-06, 'samples': 23726592, 'steps': 46340, 'batch_loss/train': 0.6768550763372332} +12/29/2021 01:34:09 - INFO - codeparrot_training - Step 46341: {'lr': 6.623884744758252e-06, 'samples': 23727104, 'steps': 46341, 'batch_loss/train': 0.7672543693333864} +12/29/2021 01:34:20 - INFO - codeparrot_training - Step 46342: {'lr': 6.620280703816734e-06, 'samples': 23727616, 'steps': 46342, 'batch_loss/train': 0.8109114640392363} +12/29/2021 01:34:30 - INFO - codeparrot_training - Step 46343: {'lr': 6.6166776304610875e-06, 'samples': 23728128, 'steps': 46343, 'batch_loss/train': 0.7410417150240391} +12/29/2021 01:34:42 - INFO - codeparrot_training - Step 46344: {'lr': 6.613075524705636e-06, 'samples': 23728640, 'steps': 46344, 'batch_loss/train': 0.5824672980234027} +12/29/2021 01:34:53 - INFO - codeparrot_training - Step 46345: {'lr': 6.609474386564701e-06, 'samples': 23729152, 'steps': 46345, 'batch_loss/train': 0.6499365079216659} +12/29/2021 01:35:04 - INFO - codeparrot_training - Step 46346: {'lr': 6.605874216052604e-06, 'samples': 23729664, 'steps': 46346, 'batch_loss/train': 0.6780941494507715} +12/29/2021 01:35:16 - INFO - codeparrot_training - Step 46347: {'lr': 6.6022750131836115e-06, 'samples': 23730176, 'steps': 46347, 'batch_loss/train': 0.6803511157631874} +12/29/2021 01:35:26 - INFO - codeparrot_training - Step 46348: {'lr': 6.5986767779721016e-06, 'samples': 23730688, 'steps': 46348, 'batch_loss/train': 0.7352011166512966} +12/29/2021 01:35:37 - INFO - codeparrot_training - Step 46349: {'lr': 6.595079510432339e-06, 'samples': 23731200, 'steps': 46349, 'batch_loss/train': 0.7249002708122134} +12/29/2021 01:35:50 - INFO - codeparrot_training - Step 46350: {'lr': 6.5914832105786745e-06, 'samples': 23731712, 'steps': 46350, 'batch_loss/train': 0.7494561118073761} +12/29/2021 01:36:00 - INFO - codeparrot_training - Step 46351: {'lr': 6.587887878425291e-06, 'samples': 23732224, 'steps': 46351, 'batch_loss/train': 0.6769770204555243} +12/29/2021 01:36:11 - INFO - codeparrot_training - Step 46352: {'lr': 6.584293513986623e-06, 'samples': 23732736, 'steps': 46352, 'batch_loss/train': 0.6718658544123173} +12/29/2021 01:36:23 - INFO - codeparrot_training - Step 46353: {'lr': 6.58070011727685e-06, 'samples': 23733248, 'steps': 46353, 'batch_loss/train': 0.6656153229996562} +12/29/2021 01:36:34 - INFO - codeparrot_training - Step 46354: {'lr': 6.577107688310296e-06, 'samples': 23733760, 'steps': 46354, 'batch_loss/train': 0.6898913233308122} +12/29/2021 01:36:44 - INFO - codeparrot_training - Step 46355: {'lr': 6.5735162271012835e-06, 'samples': 23734272, 'steps': 46355, 'batch_loss/train': 0.8442181749269366} +12/29/2021 01:36:55 - INFO - codeparrot_training - Step 46356: {'lr': 6.569925733664051e-06, 'samples': 23734784, 'steps': 46356, 'batch_loss/train': 0.719674464315176} +12/29/2021 01:37:07 - INFO - codeparrot_training - Step 46357: {'lr': 6.566336208012863e-06, 'samples': 23735296, 'steps': 46357, 'batch_loss/train': 0.6542035307502374} +12/29/2021 01:37:18 - INFO - codeparrot_training - Step 46358: {'lr': 6.562747650161988e-06, 'samples': 23735808, 'steps': 46358, 'batch_loss/train': 0.7905286625027657} +12/29/2021 01:37:28 - INFO - codeparrot_training - Step 46359: {'lr': 6.5591600601257465e-06, 'samples': 23736320, 'steps': 46359, 'batch_loss/train': 0.5966965243860614} +12/29/2021 01:37:41 - INFO - codeparrot_training - Step 46360: {'lr': 6.555573437918349e-06, 'samples': 23736832, 'steps': 46360, 'batch_loss/train': 0.7781512270448729} +12/29/2021 01:37:52 - INFO - codeparrot_training - Step 46361: {'lr': 6.551987783554092e-06, 'samples': 23737344, 'steps': 46361, 'batch_loss/train': 0.6806710120290518} +12/29/2021 01:38:03 - INFO - codeparrot_training - Step 46362: {'lr': 6.548403097047156e-06, 'samples': 23737856, 'steps': 46362, 'batch_loss/train': 0.6923778709024191} +12/29/2021 01:38:15 - INFO - codeparrot_training - Step 46363: {'lr': 6.544819378411893e-06, 'samples': 23738368, 'steps': 46363, 'batch_loss/train': 0.7078305138275027} +12/29/2021 01:38:25 - INFO - codeparrot_training - Step 46364: {'lr': 6.541236627662511e-06, 'samples': 23738880, 'steps': 46364, 'batch_loss/train': 0.7231001020409167} +12/29/2021 01:38:36 - INFO - codeparrot_training - Step 46365: {'lr': 6.537654844813196e-06, 'samples': 23739392, 'steps': 46365, 'batch_loss/train': 0.7875720467418432} +12/29/2021 01:38:47 - INFO - codeparrot_training - Step 46366: {'lr': 6.53407402987824e-06, 'samples': 23739904, 'steps': 46366, 'batch_loss/train': 0.7644362915307283} +12/29/2021 01:38:59 - INFO - codeparrot_training - Step 46367: {'lr': 6.530494182871938e-06, 'samples': 23740416, 'steps': 46367, 'batch_loss/train': 0.7559782052412629} +12/29/2021 01:39:10 - INFO - codeparrot_training - Step 46368: {'lr': 6.526915303808417e-06, 'samples': 23740928, 'steps': 46368, 'batch_loss/train': 0.711293550208211} +12/29/2021 01:39:21 - INFO - codeparrot_training - Step 46369: {'lr': 6.523337392701945e-06, 'samples': 23741440, 'steps': 46369, 'batch_loss/train': 0.7268172092735767} +12/29/2021 01:39:33 - INFO - codeparrot_training - Step 46370: {'lr': 6.519760449566758e-06, 'samples': 23741952, 'steps': 46370, 'batch_loss/train': 0.7367680789902806} +12/29/2021 01:39:43 - INFO - codeparrot_training - Step 46371: {'lr': 6.5161844744170695e-06, 'samples': 23742464, 'steps': 46371, 'batch_loss/train': 0.7374270306900144} +12/29/2021 01:39:54 - INFO - codeparrot_training - Step 46372: {'lr': 6.512609467267117e-06, 'samples': 23742976, 'steps': 46372, 'batch_loss/train': 0.8619362744502723} +12/29/2021 01:40:06 - INFO - codeparrot_training - Step 46373: {'lr': 6.509035428131055e-06, 'samples': 23743488, 'steps': 46373, 'batch_loss/train': 0.6868656971491873} +12/29/2021 01:40:17 - INFO - codeparrot_training - Step 46374: {'lr': 6.505462357023151e-06, 'samples': 23744000, 'steps': 46374, 'batch_loss/train': 0.7627067128196359} +12/29/2021 01:40:28 - INFO - codeparrot_training - Step 46375: {'lr': 6.501890253957587e-06, 'samples': 23744512, 'steps': 46375, 'batch_loss/train': 0.698751755291596} +12/29/2021 01:40:40 - INFO - codeparrot_training - Step 46376: {'lr': 6.4983191189485755e-06, 'samples': 23745024, 'steps': 46376, 'batch_loss/train': 0.7070905710570514} +12/29/2021 01:40:51 - INFO - codeparrot_training - Step 46377: {'lr': 6.494748952010271e-06, 'samples': 23745536, 'steps': 46377, 'batch_loss/train': 0.6797575056552887} +12/29/2021 01:41:02 - INFO - codeparrot_training - Step 46378: {'lr': 6.491179753156967e-06, 'samples': 23746048, 'steps': 46378, 'batch_loss/train': 0.746761177200824} +12/29/2021 01:41:12 - INFO - codeparrot_training - Step 46379: {'lr': 6.487611522402737e-06, 'samples': 23746560, 'steps': 46379, 'batch_loss/train': 0.692876560613513} +12/29/2021 01:41:24 - INFO - codeparrot_training - Step 46380: {'lr': 6.48404425976179e-06, 'samples': 23747072, 'steps': 46380, 'batch_loss/train': 0.8897634600289166} +12/29/2021 01:41:35 - INFO - codeparrot_training - Step 46381: {'lr': 6.480477965248394e-06, 'samples': 23747584, 'steps': 46381, 'batch_loss/train': 0.7385101513937116} +12/29/2021 01:41:46 - INFO - codeparrot_training - Step 46382: {'lr': 6.476912638876648e-06, 'samples': 23748096, 'steps': 46382, 'batch_loss/train': 0.7138758156797849} +12/29/2021 01:41:58 - INFO - codeparrot_training - Step 46383: {'lr': 6.473348280660735e-06, 'samples': 23748608, 'steps': 46383, 'batch_loss/train': 0.7116339770145714} +12/29/2021 01:42:09 - INFO - codeparrot_training - Step 46384: {'lr': 6.469784890614838e-06, 'samples': 23749120, 'steps': 46384, 'batch_loss/train': 0.745890696067363} +12/29/2021 01:42:19 - INFO - codeparrot_training - Step 46385: {'lr': 6.466222468753114e-06, 'samples': 23749632, 'steps': 46385, 'batch_loss/train': 0.6964520723558962} +12/29/2021 01:42:31 - INFO - codeparrot_training - Step 46386: {'lr': 6.462661015089744e-06, 'samples': 23750144, 'steps': 46386, 'batch_loss/train': 0.6807820507092401} +12/29/2021 01:42:42 - INFO - codeparrot_training - Step 46387: {'lr': 6.459100529638911e-06, 'samples': 23750656, 'steps': 46387, 'batch_loss/train': 0.841633529169485} +12/29/2021 01:42:53 - INFO - codeparrot_training - Step 46388: {'lr': 6.45554101241469e-06, 'samples': 23751168, 'steps': 46388, 'batch_loss/train': 0.7078904000809416} +12/29/2021 01:43:03 - INFO - codeparrot_training - Step 46389: {'lr': 6.451982463431289e-06, 'samples': 23751680, 'steps': 46389, 'batch_loss/train': 0.7774503817781806} +12/29/2021 01:43:16 - INFO - codeparrot_training - Step 46390: {'lr': 6.448424882702864e-06, 'samples': 23752192, 'steps': 46390, 'batch_loss/train': 0.7463746268767864} +12/29/2021 01:43:26 - INFO - codeparrot_training - Step 46391: {'lr': 6.4448682702435146e-06, 'samples': 23752704, 'steps': 46391, 'batch_loss/train': 0.7617997350171208} +12/29/2021 01:43:37 - INFO - codeparrot_training - Step 46392: {'lr': 6.441312626067425e-06, 'samples': 23753216, 'steps': 46392, 'batch_loss/train': 0.7201383241917938} +12/29/2021 01:43:49 - INFO - codeparrot_training - Step 46393: {'lr': 6.437757950188722e-06, 'samples': 23753728, 'steps': 46393, 'batch_loss/train': 0.6825038278475404} +12/29/2021 01:44:00 - INFO - codeparrot_training - Step 46394: {'lr': 6.4342042426215054e-06, 'samples': 23754240, 'steps': 46394, 'batch_loss/train': 0.7910317284986377} +12/29/2021 01:44:11 - INFO - codeparrot_training - Step 46395: {'lr': 6.430651503379931e-06, 'samples': 23754752, 'steps': 46395, 'batch_loss/train': 0.689269165857695} +12/29/2021 01:44:23 - INFO - codeparrot_training - Step 46396: {'lr': 6.427099732478126e-06, 'samples': 23755264, 'steps': 46396, 'batch_loss/train': 0.7834515022113919} +12/29/2021 01:44:33 - INFO - codeparrot_training - Step 46397: {'lr': 6.42354892993019e-06, 'samples': 23755776, 'steps': 46397, 'batch_loss/train': 0.6930960933677852} +12/29/2021 01:44:44 - INFO - codeparrot_training - Step 46398: {'lr': 6.419999095750278e-06, 'samples': 23756288, 'steps': 46398, 'batch_loss/train': 0.7360873953439295} +12/29/2021 01:44:54 - INFO - codeparrot_training - Step 46399: {'lr': 6.416450229952464e-06, 'samples': 23756800, 'steps': 46399, 'batch_loss/train': 0.7392800664529204} +12/29/2021 01:45:07 - INFO - codeparrot_training - Step 46400: {'lr': 6.412902332550846e-06, 'samples': 23757312, 'steps': 46400, 'batch_loss/train': 0.8809290342032909} +12/29/2021 01:45:18 - INFO - codeparrot_training - Step 46401: {'lr': 6.4093554035595806e-06, 'samples': 23757824, 'steps': 46401, 'batch_loss/train': 0.7390345067251474} +12/29/2021 01:45:29 - INFO - codeparrot_training - Step 46402: {'lr': 6.405809442992739e-06, 'samples': 23758336, 'steps': 46402, 'batch_loss/train': 0.8515718767885119} +12/29/2021 01:45:41 - INFO - codeparrot_training - Step 46403: {'lr': 6.40226445086442e-06, 'samples': 23758848, 'steps': 46403, 'batch_loss/train': 0.7197595646139234} +12/29/2021 01:45:51 - INFO - codeparrot_training - Step 46404: {'lr': 6.398720427188698e-06, 'samples': 23759360, 'steps': 46404, 'batch_loss/train': 0.789801275357604} +12/29/2021 01:46:02 - INFO - codeparrot_training - Step 46405: {'lr': 6.395177371979727e-06, 'samples': 23759872, 'steps': 46405, 'batch_loss/train': 0.8187500997446477} +12/29/2021 01:46:15 - INFO - codeparrot_training - Step 46406: {'lr': 6.391635285251496e-06, 'samples': 23760384, 'steps': 46406, 'batch_loss/train': 0.7736975830048323} +12/29/2021 01:46:25 - INFO - codeparrot_training - Step 46407: {'lr': 6.388094167018188e-06, 'samples': 23760896, 'steps': 46407, 'batch_loss/train': 0.7934151571244001} +12/29/2021 01:46:36 - INFO - codeparrot_training - Step 46408: {'lr': 6.38455401729382e-06, 'samples': 23761408, 'steps': 46408, 'batch_loss/train': 0.7181200970662758} +12/29/2021 01:46:48 - INFO - codeparrot_training - Step 46409: {'lr': 6.381014836092463e-06, 'samples': 23761920, 'steps': 46409, 'batch_loss/train': 0.7478167889639735} +12/29/2021 01:46:59 - INFO - codeparrot_training - Step 46410: {'lr': 6.377476623428219e-06, 'samples': 23762432, 'steps': 46410, 'batch_loss/train': 0.7950517609715462} +12/29/2021 01:47:09 - INFO - codeparrot_training - Step 46411: {'lr': 6.37393937931513e-06, 'samples': 23762944, 'steps': 46411, 'batch_loss/train': 0.6500310113187879} +12/29/2021 01:47:20 - INFO - codeparrot_training - Step 46412: {'lr': 6.3704031037672695e-06, 'samples': 23763456, 'steps': 46412, 'batch_loss/train': 0.7586730099283159} +12/29/2021 01:47:32 - INFO - codeparrot_training - Step 46413: {'lr': 6.3668677967987085e-06, 'samples': 23763968, 'steps': 46413, 'batch_loss/train': 0.751184223452583} +12/29/2021 01:47:43 - INFO - codeparrot_training - Step 46414: {'lr': 6.3633334584234645e-06, 'samples': 23764480, 'steps': 46414, 'batch_loss/train': 0.7573749315924942} +12/29/2021 01:47:54 - INFO - codeparrot_training - Step 46415: {'lr': 6.359800088655637e-06, 'samples': 23764992, 'steps': 46415, 'batch_loss/train': 0.6161970216780901} +12/29/2021 01:48:06 - INFO - codeparrot_training - Step 46416: {'lr': 6.356267687509271e-06, 'samples': 23765504, 'steps': 46416, 'batch_loss/train': 0.8631813812535256} +12/29/2021 01:48:17 - INFO - codeparrot_training - Step 46417: {'lr': 6.3527362549983255e-06, 'samples': 23766016, 'steps': 46417, 'batch_loss/train': 0.7963525473605841} +12/29/2021 01:48:28 - INFO - codeparrot_training - Step 46418: {'lr': 6.34920579113693e-06, 'samples': 23766528, 'steps': 46418, 'batch_loss/train': 0.6296023488976061} +12/29/2021 01:48:40 - INFO - codeparrot_training - Step 46419: {'lr': 6.345676295939129e-06, 'samples': 23767040, 'steps': 46419, 'batch_loss/train': 0.751110983081162} +12/29/2021 01:48:51 - INFO - codeparrot_training - Step 46420: {'lr': 6.342147769418882e-06, 'samples': 23767552, 'steps': 46420, 'batch_loss/train': 0.7291943242307752} +12/29/2021 01:49:01 - INFO - codeparrot_training - Step 46421: {'lr': 6.338620211590235e-06, 'samples': 23768064, 'steps': 46421, 'batch_loss/train': 0.8230069817509502} +12/29/2021 01:49:12 - INFO - codeparrot_training - Step 46422: {'lr': 6.335093622467259e-06, 'samples': 23768576, 'steps': 46422, 'batch_loss/train': 0.7420104295597412} +12/29/2021 01:49:24 - INFO - codeparrot_training - Step 46423: {'lr': 6.331568002063942e-06, 'samples': 23769088, 'steps': 46423, 'batch_loss/train': 0.7246180921792984} +12/29/2021 01:49:35 - INFO - codeparrot_training - Step 46424: {'lr': 6.3280433503943025e-06, 'samples': 23769600, 'steps': 46424, 'batch_loss/train': 0.600519105559215} +12/29/2021 01:49:46 - INFO - codeparrot_training - Step 46425: {'lr': 6.324519667472384e-06, 'samples': 23770112, 'steps': 46425, 'batch_loss/train': 0.7526837214827538} +12/29/2021 01:49:58 - INFO - codeparrot_training - Step 46426: {'lr': 6.320996953312119e-06, 'samples': 23770624, 'steps': 46426, 'batch_loss/train': 0.7434886915143579} +12/29/2021 01:50:09 - INFO - codeparrot_training - Step 46427: {'lr': 6.317475207927581e-06, 'samples': 23771136, 'steps': 46427, 'batch_loss/train': 0.781601800583303} +12/29/2021 01:50:20 - INFO - codeparrot_training - Step 46428: {'lr': 6.313954431332758e-06, 'samples': 23771648, 'steps': 46428, 'batch_loss/train': 0.7699305498972535} +12/29/2021 01:50:32 - INFO - codeparrot_training - Step 46429: {'lr': 6.310434623541639e-06, 'samples': 23772160, 'steps': 46429, 'batch_loss/train': 0.9751603100448847} +12/29/2021 01:50:43 - INFO - codeparrot_training - Step 46430: {'lr': 6.306915784568212e-06, 'samples': 23772672, 'steps': 46430, 'batch_loss/train': 0.687387265264988} +12/29/2021 01:50:54 - INFO - codeparrot_training - Step 46431: {'lr': 6.303397914426495e-06, 'samples': 23773184, 'steps': 46431, 'batch_loss/train': 0.7126328311860561} +12/29/2021 01:51:04 - INFO - codeparrot_training - Step 46432: {'lr': 6.299881013130421e-06, 'samples': 23773696, 'steps': 46432, 'batch_loss/train': 0.7399997990578413} +12/29/2021 01:51:16 - INFO - codeparrot_training - Step 46433: {'lr': 6.296365080694033e-06, 'samples': 23774208, 'steps': 46433, 'batch_loss/train': 0.7924150517210364} +12/29/2021 01:51:27 - INFO - codeparrot_training - Step 46434: {'lr': 6.292850117131238e-06, 'samples': 23774720, 'steps': 46434, 'batch_loss/train': 0.8769879890605807} +12/29/2021 01:51:38 - INFO - codeparrot_training - Step 46435: {'lr': 6.289336122456079e-06, 'samples': 23775232, 'steps': 46435, 'batch_loss/train': 0.771605129702948} +12/29/2021 01:51:50 - INFO - codeparrot_training - Step 46436: {'lr': 6.285823096682492e-06, 'samples': 23775744, 'steps': 46436, 'batch_loss/train': 0.8264692923985422} +12/29/2021 01:52:01 - INFO - codeparrot_training - Step 46437: {'lr': 6.282311039824462e-06, 'samples': 23776256, 'steps': 46437, 'batch_loss/train': 0.8125214921310544} +12/29/2021 01:52:12 - INFO - codeparrot_training - Step 46438: {'lr': 6.278799951895897e-06, 'samples': 23776768, 'steps': 46438, 'batch_loss/train': 0.7279928792268038} +12/29/2021 01:52:24 - INFO - codeparrot_training - Step 46439: {'lr': 6.275289832910841e-06, 'samples': 23777280, 'steps': 46439, 'batch_loss/train': 0.7321909796446562} +12/29/2021 01:52:35 - INFO - codeparrot_training - Step 46440: {'lr': 6.271780682883199e-06, 'samples': 23777792, 'steps': 46440, 'batch_loss/train': 0.6781269208877347} +12/29/2021 01:52:45 - INFO - codeparrot_training - Step 46441: {'lr': 6.268272501826905e-06, 'samples': 23778304, 'steps': 46441, 'batch_loss/train': 0.7829091819003224} +12/29/2021 01:52:56 - INFO - codeparrot_training - Step 46442: {'lr': 6.264765289755975e-06, 'samples': 23778816, 'steps': 46442, 'batch_loss/train': 0.7697457297472283} +12/29/2021 01:53:08 - INFO - codeparrot_training - Step 46443: {'lr': 6.261259046684259e-06, 'samples': 23779328, 'steps': 46443, 'batch_loss/train': 0.7417254492174834} +12/29/2021 01:53:18 - INFO - codeparrot_training - Step 46444: {'lr': 6.2577537726257465e-06, 'samples': 23779840, 'steps': 46444, 'batch_loss/train': 0.6307087338063866} +12/29/2021 01:53:29 - INFO - codeparrot_training - Step 46445: {'lr': 6.254249467594425e-06, 'samples': 23780352, 'steps': 46445, 'batch_loss/train': 0.8291733507066965} +12/29/2021 01:53:42 - INFO - codeparrot_training - Step 46446: {'lr': 6.250746131604118e-06, 'samples': 23780864, 'steps': 46446, 'batch_loss/train': 0.7342126802541316} +12/29/2021 01:53:53 - INFO - codeparrot_training - Step 46447: {'lr': 6.247243764668814e-06, 'samples': 23781376, 'steps': 46447, 'batch_loss/train': 0.4937426178948954} +12/29/2021 01:54:04 - INFO - codeparrot_training - Step 46448: {'lr': 6.243742366802474e-06, 'samples': 23781888, 'steps': 46448, 'batch_loss/train': 0.6934433486312628} +12/29/2021 01:54:16 - INFO - codeparrot_training - Step 46449: {'lr': 6.2402419380189476e-06, 'samples': 23782400, 'steps': 46449, 'batch_loss/train': 1.3155236952006817} +12/29/2021 01:54:26 - INFO - codeparrot_training - Step 46450: {'lr': 6.236742478332169e-06, 'samples': 23782912, 'steps': 46450, 'batch_loss/train': 0.7212679823860526} +12/29/2021 01:54:37 - INFO - codeparrot_training - Step 46451: {'lr': 6.233243987756071e-06, 'samples': 23783424, 'steps': 46451, 'batch_loss/train': 0.7001624200493097} +12/29/2021 01:54:48 - INFO - codeparrot_training - Step 46452: {'lr': 6.229746466304531e-06, 'samples': 23783936, 'steps': 46452, 'batch_loss/train': 0.6948142568580806} +12/29/2021 01:55:00 - INFO - codeparrot_training - Step 46453: {'lr': 6.22624991399151e-06, 'samples': 23784448, 'steps': 46453, 'batch_loss/train': 0.6296020117588341} +12/29/2021 01:55:10 - INFO - codeparrot_training - Step 46454: {'lr': 6.22275433083086e-06, 'samples': 23784960, 'steps': 46454, 'batch_loss/train': 0.7369946800172329} +12/29/2021 01:55:21 - INFO - codeparrot_training - Step 46455: {'lr': 6.219259716836484e-06, 'samples': 23785472, 'steps': 46455, 'batch_loss/train': 0.6473789773881435} +12/29/2021 01:55:34 - INFO - codeparrot_training - Step 46456: {'lr': 6.215766072022288e-06, 'samples': 23785984, 'steps': 46456, 'batch_loss/train': 0.8186690732836723} +12/29/2021 01:55:44 - INFO - codeparrot_training - Step 46457: {'lr': 6.212273396402179e-06, 'samples': 23786496, 'steps': 46457, 'batch_loss/train': 0.7431454351171851} +12/29/2021 01:55:55 - INFO - codeparrot_training - Step 46458: {'lr': 6.208781689989979e-06, 'samples': 23787008, 'steps': 46458, 'batch_loss/train': 0.6490856271702796} +12/29/2021 01:56:07 - INFO - codeparrot_training - Step 46459: {'lr': 6.205290952799619e-06, 'samples': 23787520, 'steps': 46459, 'batch_loss/train': 0.759344800375402} +12/29/2021 01:56:18 - INFO - codeparrot_training - Step 46460: {'lr': 6.201801184845007e-06, 'samples': 23788032, 'steps': 46460, 'batch_loss/train': 0.7218066564528272} +12/29/2021 01:56:28 - INFO - codeparrot_training - Step 46461: {'lr': 6.198312386139965e-06, 'samples': 23788544, 'steps': 46461, 'batch_loss/train': 0.7368890689685941} +12/29/2021 01:56:41 - INFO - codeparrot_training - Step 46462: {'lr': 6.1948245566983415e-06, 'samples': 23789056, 'steps': 46462, 'batch_loss/train': 0.7741915481165051} +12/29/2021 01:56:52 - INFO - codeparrot_training - Step 46463: {'lr': 6.191337696534072e-06, 'samples': 23789568, 'steps': 46463, 'batch_loss/train': 0.8471130318939686} +12/29/2021 01:57:03 - INFO - codeparrot_training - Step 46464: {'lr': 6.187851805660977e-06, 'samples': 23790080, 'steps': 46464, 'batch_loss/train': 0.745207377942279} +12/29/2021 01:57:13 - INFO - codeparrot_training - Step 46465: {'lr': 6.184366884092907e-06, 'samples': 23790592, 'steps': 46465, 'batch_loss/train': 0.7226653029210865} +12/29/2021 01:57:26 - INFO - codeparrot_training - Step 46466: {'lr': 6.180882931843767e-06, 'samples': 23791104, 'steps': 46466, 'batch_loss/train': 0.7609192449599504} +12/29/2021 01:57:36 - INFO - codeparrot_training - Step 46467: {'lr': 6.1773999489273534e-06, 'samples': 23791616, 'steps': 46467, 'batch_loss/train': 0.5952908378094435} +12/29/2021 01:57:47 - INFO - codeparrot_training - Step 46468: {'lr': 6.173917935357543e-06, 'samples': 23792128, 'steps': 46468, 'batch_loss/train': 0.6957054417580366} +12/29/2021 01:57:59 - INFO - codeparrot_training - Step 46469: {'lr': 6.170436891148129e-06, 'samples': 23792640, 'steps': 46469, 'batch_loss/train': 0.7814653888344765} +12/29/2021 01:58:10 - INFO - codeparrot_training - Step 46470: {'lr': 6.166956816313046e-06, 'samples': 23793152, 'steps': 46470, 'batch_loss/train': 0.6921428088098764} +12/29/2021 01:58:21 - INFO - codeparrot_training - Step 46471: {'lr': 6.163477710866061e-06, 'samples': 23793664, 'steps': 46471, 'batch_loss/train': 0.6978792957961559} +12/29/2021 01:58:33 - INFO - codeparrot_training - Step 46472: {'lr': 6.159999574821024e-06, 'samples': 23794176, 'steps': 46472, 'batch_loss/train': 0.5723495418787934} +12/29/2021 01:58:43 - INFO - codeparrot_training - Step 46473: {'lr': 6.156522408191701e-06, 'samples': 23794688, 'steps': 46473, 'batch_loss/train': 0.732360870577395} +12/29/2021 01:58:54 - INFO - codeparrot_training - Step 46474: {'lr': 6.1530462109920525e-06, 'samples': 23795200, 'steps': 46474, 'batch_loss/train': 1.4842447601258755} +12/29/2021 01:59:05 - INFO - codeparrot_training - Step 46475: {'lr': 6.149570983235792e-06, 'samples': 23795712, 'steps': 46475, 'batch_loss/train': 0.7000930937938392} +12/29/2021 01:59:19 - INFO - codeparrot_training - Step 46476: {'lr': 6.146096724936712e-06, 'samples': 23796224, 'steps': 46476, 'batch_loss/train': 0.6968480418436229} +12/29/2021 01:59:30 - INFO - codeparrot_training - Step 46477: {'lr': 6.1426234361087465e-06, 'samples': 23796736, 'steps': 46477, 'batch_loss/train': 0.8055871017277241} +12/29/2021 01:59:40 - INFO - codeparrot_training - Step 46478: {'lr': 6.1391511167656065e-06, 'samples': 23797248, 'steps': 46478, 'batch_loss/train': 0.8214521887712181} +12/29/2021 01:59:51 - INFO - codeparrot_training - Step 46479: {'lr': 6.135679766921115e-06, 'samples': 23797760, 'steps': 46479, 'batch_loss/train': 1.3545818668790162} +12/29/2021 02:00:03 - INFO - codeparrot_training - Step 46480: {'lr': 6.132209386589094e-06, 'samples': 23798272, 'steps': 46480, 'batch_loss/train': 0.8609001273289323} +12/29/2021 02:00:14 - INFO - codeparrot_training - Step 46481: {'lr': 6.128739975783337e-06, 'samples': 23798784, 'steps': 46481, 'batch_loss/train': 0.7681252444162965} +12/29/2021 02:00:24 - INFO - codeparrot_training - Step 46482: {'lr': 6.125271534517612e-06, 'samples': 23799296, 'steps': 46482, 'batch_loss/train': 0.7975415172986686} +12/29/2021 02:00:36 - INFO - codeparrot_training - Step 46483: {'lr': 6.121804062805741e-06, 'samples': 23799808, 'steps': 46483, 'batch_loss/train': 0.7180179292336106} +12/29/2021 02:00:47 - INFO - codeparrot_training - Step 46484: {'lr': 6.1183375606614635e-06, 'samples': 23800320, 'steps': 46484, 'batch_loss/train': 0.7703862437047064} +12/29/2021 02:00:58 - INFO - codeparrot_training - Step 46485: {'lr': 6.1148720280986e-06, 'samples': 23800832, 'steps': 46485, 'batch_loss/train': 0.7604290153831244} +12/29/2021 02:01:11 - INFO - codeparrot_training - Step 46486: {'lr': 6.111407465130947e-06, 'samples': 23801344, 'steps': 46486, 'batch_loss/train': 0.6974516985937953} +12/29/2021 02:01:22 - INFO - codeparrot_training - Step 46487: {'lr': 6.1079438717722145e-06, 'samples': 23801856, 'steps': 46487, 'batch_loss/train': 0.788780327886343} +12/29/2021 02:01:32 - INFO - codeparrot_training - Step 46488: {'lr': 6.1044812480361974e-06, 'samples': 23802368, 'steps': 46488, 'batch_loss/train': 0.5716908075846732} +12/29/2021 02:01:43 - INFO - codeparrot_training - Step 46489: {'lr': 6.1010195939366905e-06, 'samples': 23802880, 'steps': 46489, 'batch_loss/train': 0.7438450357876718} +12/29/2021 02:01:55 - INFO - codeparrot_training - Step 46490: {'lr': 6.0975589094874325e-06, 'samples': 23803392, 'steps': 46490, 'batch_loss/train': 0.705945584923029} +12/29/2021 02:02:06 - INFO - codeparrot_training - Step 46491: {'lr': 6.09409919470219e-06, 'samples': 23803904, 'steps': 46491, 'batch_loss/train': 0.8353746677748859} +12/29/2021 02:02:16 - INFO - codeparrot_training - Step 46492: {'lr': 6.090640449594731e-06, 'samples': 23804416, 'steps': 46492, 'batch_loss/train': 0.8099678419530392} +12/29/2021 02:02:29 - INFO - codeparrot_training - Step 46493: {'lr': 6.087182674178765e-06, 'samples': 23804928, 'steps': 46493, 'batch_loss/train': 0.7870851410552859} +12/29/2021 02:02:40 - INFO - codeparrot_training - Step 46494: {'lr': 6.0837258684680595e-06, 'samples': 23805440, 'steps': 46494, 'batch_loss/train': 0.7932814084924757} +12/29/2021 02:02:50 - INFO - codeparrot_training - Step 46495: {'lr': 6.080270032476381e-06, 'samples': 23805952, 'steps': 46495, 'batch_loss/train': 0.7178882961161435} +12/29/2021 02:03:03 - INFO - codeparrot_training - Step 46496: {'lr': 6.076815166217442e-06, 'samples': 23806464, 'steps': 46496, 'batch_loss/train': 0.8195964267943054} +12/29/2021 02:03:13 - INFO - codeparrot_training - Step 46497: {'lr': 6.07336126970498e-06, 'samples': 23806976, 'steps': 46497, 'batch_loss/train': 0.7397699411958456} +12/29/2021 02:03:24 - INFO - codeparrot_training - Step 46498: {'lr': 6.069908342952734e-06, 'samples': 23807488, 'steps': 46498, 'batch_loss/train': 0.8704884201288223} +12/29/2021 02:03:36 - INFO - codeparrot_training - Step 46499: {'lr': 6.066456385974417e-06, 'samples': 23808000, 'steps': 46499, 'batch_loss/train': 0.7309365822002292} +12/29/2021 02:03:47 - INFO - codeparrot_training - Step 46500: {'lr': 6.063005398783794e-06, 'samples': 23808512, 'steps': 46500, 'batch_loss/train': 0.7682087626308203} +12/29/2021 02:03:57 - INFO - codeparrot_training - Step 46501: {'lr': 6.059555381394521e-06, 'samples': 23809024, 'steps': 46501, 'batch_loss/train': 0.6988576692529023} +12/29/2021 02:04:08 - INFO - codeparrot_training - Step 46502: {'lr': 6.056106333820338e-06, 'samples': 23809536, 'steps': 46502, 'batch_loss/train': 0.6340116629144177} +12/29/2021 02:04:21 - INFO - codeparrot_training - Step 46503: {'lr': 6.0526582560750106e-06, 'samples': 23810048, 'steps': 46503, 'batch_loss/train': 0.8244126616045833} +12/29/2021 02:04:31 - INFO - codeparrot_training - Step 46504: {'lr': 6.049211148172168e-06, 'samples': 23810560, 'steps': 46504, 'batch_loss/train': 0.5802675224840641} +12/29/2021 02:04:42 - INFO - codeparrot_training - Step 46505: {'lr': 6.045765010125576e-06, 'samples': 23811072, 'steps': 46505, 'batch_loss/train': 0.7935726951109245} +12/29/2021 02:04:54 - INFO - codeparrot_training - Step 46506: {'lr': 6.04231984194889e-06, 'samples': 23811584, 'steps': 46506, 'batch_loss/train': 0.7608450115658343} +12/29/2021 02:05:05 - INFO - codeparrot_training - Step 46507: {'lr': 6.038875643655823e-06, 'samples': 23812096, 'steps': 46507, 'batch_loss/train': 0.8260759143158793} +12/29/2021 02:05:15 - INFO - codeparrot_training - Step 46508: {'lr': 6.035432415260056e-06, 'samples': 23812608, 'steps': 46508, 'batch_loss/train': 0.7235207429621369} +12/29/2021 02:05:28 - INFO - codeparrot_training - Step 46509: {'lr': 6.031990156775357e-06, 'samples': 23813120, 'steps': 46509, 'batch_loss/train': 0.7118728188797832} +12/29/2021 02:05:39 - INFO - codeparrot_training - Step 46510: {'lr': 6.028548868215272e-06, 'samples': 23813632, 'steps': 46510, 'batch_loss/train': 0.6526904201600701} +12/29/2021 02:05:49 - INFO - codeparrot_training - Step 46511: {'lr': 6.0251085495935945e-06, 'samples': 23814144, 'steps': 46511, 'batch_loss/train': 0.7916270764544606} +12/29/2021 02:06:00 - INFO - codeparrot_training - Step 46512: {'lr': 6.0216692009239795e-06, 'samples': 23814656, 'steps': 46512, 'batch_loss/train': 0.7087158495560288} +12/29/2021 02:06:13 - INFO - codeparrot_training - Step 46513: {'lr': 6.0182308222200274e-06, 'samples': 23815168, 'steps': 46513, 'batch_loss/train': 0.7310427273623645} +12/29/2021 02:06:23 - INFO - codeparrot_training - Step 46514: {'lr': 6.014793413495534e-06, 'samples': 23815680, 'steps': 46514, 'batch_loss/train': 0.7394919523503631} +12/29/2021 02:06:34 - INFO - codeparrot_training - Step 46515: {'lr': 6.011356974764043e-06, 'samples': 23816192, 'steps': 46515, 'batch_loss/train': 0.6547909241635352} +12/29/2021 02:06:46 - INFO - codeparrot_training - Step 46516: {'lr': 6.007921506039293e-06, 'samples': 23816704, 'steps': 46516, 'batch_loss/train': 0.6222384599968791} +12/29/2021 02:06:57 - INFO - codeparrot_training - Step 46517: {'lr': 6.004487007334913e-06, 'samples': 23817216, 'steps': 46517, 'batch_loss/train': 0.7353796781972051} +12/29/2021 02:07:08 - INFO - codeparrot_training - Step 46518: {'lr': 6.001053478664558e-06, 'samples': 23817728, 'steps': 46518, 'batch_loss/train': 0.72452257364057} +12/29/2021 02:07:20 - INFO - codeparrot_training - Step 46519: {'lr': 5.997620920041913e-06, 'samples': 23818240, 'steps': 46519, 'batch_loss/train': 0.7200478194281459} +12/29/2021 02:07:30 - INFO - codeparrot_training - Step 46520: {'lr': 5.9941893314805765e-06, 'samples': 23818752, 'steps': 46520, 'batch_loss/train': 0.6804830739274621} +12/29/2021 02:07:41 - INFO - codeparrot_training - Step 46521: {'lr': 5.9907587129942045e-06, 'samples': 23819264, 'steps': 46521, 'batch_loss/train': 0.7532029319554567} +12/29/2021 02:07:52 - INFO - codeparrot_training - Step 46522: {'lr': 5.987329064596425e-06, 'samples': 23819776, 'steps': 46522, 'batch_loss/train': 0.7948908022372052} +12/29/2021 02:08:05 - INFO - codeparrot_training - Step 46523: {'lr': 5.98390038630095e-06, 'samples': 23820288, 'steps': 46523, 'batch_loss/train': 0.6846754889702424} +12/29/2021 02:08:15 - INFO - codeparrot_training - Step 46524: {'lr': 5.980472678121296e-06, 'samples': 23820800, 'steps': 46524, 'batch_loss/train': 0.6885110780131072} +12/29/2021 02:08:26 - INFO - codeparrot_training - Step 46525: {'lr': 5.9770459400711464e-06, 'samples': 23821312, 'steps': 46525, 'batch_loss/train': 0.5922926403582096} +12/29/2021 02:08:38 - INFO - codeparrot_training - Step 46526: {'lr': 5.973620172164157e-06, 'samples': 23821824, 'steps': 46526, 'batch_loss/train': 0.6758197881281376} +12/29/2021 02:08:49 - INFO - codeparrot_training - Step 46527: {'lr': 5.9701953744138726e-06, 'samples': 23822336, 'steps': 46527, 'batch_loss/train': 0.7485631455201656} +12/29/2021 02:08:59 - INFO - codeparrot_training - Step 46528: {'lr': 5.966771546833949e-06, 'samples': 23822848, 'steps': 46528, 'batch_loss/train': 0.7625657003372908} +12/29/2021 02:09:11 - INFO - codeparrot_training - Step 46529: {'lr': 5.9633486894380414e-06, 'samples': 23823360, 'steps': 46529, 'batch_loss/train': 0.7642679126001894} +12/29/2021 02:09:22 - INFO - codeparrot_training - Step 46530: {'lr': 5.959926802239696e-06, 'samples': 23823872, 'steps': 46530, 'batch_loss/train': 0.7297056037932634} +12/29/2021 02:09:32 - INFO - codeparrot_training - Step 46531: {'lr': 5.956505885252539e-06, 'samples': 23824384, 'steps': 46531, 'batch_loss/train': 0.7247404521331191} +12/29/2021 02:09:43 - INFO - codeparrot_training - Step 46532: {'lr': 5.9530859384901435e-06, 'samples': 23824896, 'steps': 46532, 'batch_loss/train': 0.7579891355708241} +12/29/2021 02:09:56 - INFO - codeparrot_training - Step 46533: {'lr': 5.949666961966138e-06, 'samples': 23825408, 'steps': 46533, 'batch_loss/train': 0.6510486899642274} +12/29/2021 02:10:06 - INFO - codeparrot_training - Step 46534: {'lr': 5.946248955694122e-06, 'samples': 23825920, 'steps': 46534, 'batch_loss/train': 0.673755771946162} +12/29/2021 02:10:17 - INFO - codeparrot_training - Step 46535: {'lr': 5.942831919687697e-06, 'samples': 23826432, 'steps': 46535, 'batch_loss/train': 0.6699563353322446} +12/29/2021 02:10:30 - INFO - codeparrot_training - Step 46536: {'lr': 5.939415853960351e-06, 'samples': 23826944, 'steps': 46536, 'batch_loss/train': 0.6711962191620842} +12/29/2021 02:10:40 - INFO - codeparrot_training - Step 46537: {'lr': 5.936000758525767e-06, 'samples': 23827456, 'steps': 46537, 'batch_loss/train': 0.6519939270219766} +12/29/2021 02:10:51 - INFO - codeparrot_training - Step 46538: {'lr': 5.932586633397491e-06, 'samples': 23827968, 'steps': 46538, 'batch_loss/train': 0.7408841736614704} +12/29/2021 02:11:02 - INFO - codeparrot_training - Step 46539: {'lr': 5.929173478589067e-06, 'samples': 23828480, 'steps': 46539, 'batch_loss/train': 0.5259209295036271} +12/29/2021 02:11:14 - INFO - codeparrot_training - Step 46540: {'lr': 5.925761294114124e-06, 'samples': 23828992, 'steps': 46540, 'batch_loss/train': 0.7611669942270964} +12/29/2021 02:11:25 - INFO - codeparrot_training - Step 46541: {'lr': 5.922350079986205e-06, 'samples': 23829504, 'steps': 46541, 'batch_loss/train': 0.5583180077373981} +12/29/2021 02:11:35 - INFO - codeparrot_training - Step 46542: {'lr': 5.918939836218856e-06, 'samples': 23830016, 'steps': 46542, 'batch_loss/train': 0.6444703817833215} +12/29/2021 02:11:47 - INFO - codeparrot_training - Step 46543: {'lr': 5.915530562825622e-06, 'samples': 23830528, 'steps': 46543, 'batch_loss/train': 0.7430653502233326} +12/29/2021 02:11:58 - INFO - codeparrot_training - Step 46544: {'lr': 5.9121222598200745e-06, 'samples': 23831040, 'steps': 46544, 'batch_loss/train': 0.7583739114925265} +12/29/2021 02:12:09 - INFO - codeparrot_training - Step 46545: {'lr': 5.908714927215786e-06, 'samples': 23831552, 'steps': 46545, 'batch_loss/train': 0.8097171150147915} +12/29/2021 02:12:17 - INFO - codeparrot_training - Dataset epoch: 2 +12/29/2021 02:12:23 - INFO - codeparrot_training - Step 46546: {'lr': 5.905308565026274e-06, 'samples': 23832064, 'steps': 46546, 'batch_loss/train': 0.6446466716006398} +12/29/2021 02:12:34 - INFO - codeparrot_training - Step 46547: {'lr': 5.901903173265083e-06, 'samples': 23832576, 'steps': 46547, 'batch_loss/train': 0.7015944602899253} +12/29/2021 02:12:44 - INFO - codeparrot_training - Step 46548: {'lr': 5.898498751945758e-06, 'samples': 23833088, 'steps': 46548, 'batch_loss/train': 0.6435019322670996} +12/29/2021 02:12:55 - INFO - codeparrot_training - Step 46549: {'lr': 5.895095301081815e-06, 'samples': 23833600, 'steps': 46549, 'batch_loss/train': 0.7310878769494593} +12/29/2021 02:13:09 - INFO - codeparrot_training - Step 46550: {'lr': 5.891692820686828e-06, 'samples': 23834112, 'steps': 46550, 'batch_loss/train': 0.7807645443826914} +12/29/2021 02:13:19 - INFO - codeparrot_training - Step 46551: {'lr': 5.8882913107742856e-06, 'samples': 23834624, 'steps': 46551, 'batch_loss/train': 0.6658499822951853} +12/29/2021 02:13:30 - INFO - codeparrot_training - Step 46552: {'lr': 5.8848907713577595e-06, 'samples': 23835136, 'steps': 46552, 'batch_loss/train': 0.6555053456395399} +12/29/2021 02:13:42 - INFO - codeparrot_training - Step 46553: {'lr': 5.881491202450712e-06, 'samples': 23835648, 'steps': 46553, 'batch_loss/train': 0.643343451898545} +12/29/2021 02:13:53 - INFO - codeparrot_training - Step 46554: {'lr': 5.878092604066631e-06, 'samples': 23836160, 'steps': 46554, 'batch_loss/train': 0.6851258943788707} +12/29/2021 02:14:03 - INFO - codeparrot_training - Step 46555: {'lr': 5.874694976219147e-06, 'samples': 23836672, 'steps': 46555, 'batch_loss/train': 0.5125106011983007} +12/29/2021 02:14:16 - INFO - codeparrot_training - Step 46556: {'lr': 5.871298318921664e-06, 'samples': 23837184, 'steps': 46556, 'batch_loss/train': 0.7327632217202336} +12/29/2021 02:14:26 - INFO - codeparrot_training - Step 46557: {'lr': 5.8679026321877275e-06, 'samples': 23837696, 'steps': 46557, 'batch_loss/train': 0.6759601363446563} +12/29/2021 02:14:37 - INFO - codeparrot_training - Step 46558: {'lr': 5.864507916030826e-06, 'samples': 23838208, 'steps': 46558, 'batch_loss/train': 0.6564153367653489} +12/29/2021 02:14:51 - INFO - codeparrot_training - Step 46559: {'lr': 5.8611141704644485e-06, 'samples': 23838720, 'steps': 46559, 'batch_loss/train': 0.7766203552018851} +12/29/2021 02:15:01 - INFO - codeparrot_training - Step 46560: {'lr': 5.857721395502114e-06, 'samples': 23839232, 'steps': 46560, 'batch_loss/train': 0.6951513696694747} +12/29/2021 02:15:12 - INFO - codeparrot_training - Step 46561: {'lr': 5.854329591157309e-06, 'samples': 23839744, 'steps': 46561, 'batch_loss/train': 0.5892315013334155} +12/29/2021 02:15:23 - INFO - codeparrot_training - Step 46562: {'lr': 5.850938757443441e-06, 'samples': 23840256, 'steps': 46562, 'batch_loss/train': 0.7197529512923211} +12/29/2021 02:15:35 - INFO - codeparrot_training - Step 46563: {'lr': 5.847548894374111e-06, 'samples': 23840768, 'steps': 46563, 'batch_loss/train': 0.7384588662534952} +12/29/2021 02:15:46 - INFO - codeparrot_training - Step 46564: {'lr': 5.84416000196275e-06, 'samples': 23841280, 'steps': 46564, 'batch_loss/train': 0.7377502433955669} +12/29/2021 02:15:56 - INFO - codeparrot_training - Step 46565: {'lr': 5.840772080222767e-06, 'samples': 23841792, 'steps': 46565, 'batch_loss/train': 0.589922450017184} +12/29/2021 02:16:10 - INFO - codeparrot_training - Step 46566: {'lr': 5.837385129167705e-06, 'samples': 23842304, 'steps': 46566, 'batch_loss/train': 0.7954148184508085} +12/29/2021 02:16:21 - INFO - codeparrot_training - Step 46567: {'lr': 5.833999148811026e-06, 'samples': 23842816, 'steps': 46567, 'batch_loss/train': 0.6840092416387051} +12/29/2021 02:16:31 - INFO - codeparrot_training - Step 46568: {'lr': 5.830614139166163e-06, 'samples': 23843328, 'steps': 46568, 'batch_loss/train': 0.5792427103151567} +12/29/2021 02:16:44 - INFO - codeparrot_training - Step 46569: {'lr': 5.8272301002465774e-06, 'samples': 23843840, 'steps': 46569, 'batch_loss/train': 1.136985257267952} +12/29/2021 02:16:54 - INFO - codeparrot_training - Step 46570: {'lr': 5.823847032065732e-06, 'samples': 23844352, 'steps': 46570, 'batch_loss/train': 0.8020467665046453} +12/29/2021 02:17:05 - INFO - codeparrot_training - Step 46571: {'lr': 5.82046493463706e-06, 'samples': 23844864, 'steps': 46571, 'batch_loss/train': 0.8135793614201248} +12/29/2021 02:17:15 - INFO - codeparrot_training - Step 46572: {'lr': 5.817083807974022e-06, 'samples': 23845376, 'steps': 46572, 'batch_loss/train': 0.7617074807640165} +12/29/2021 02:17:28 - INFO - codeparrot_training - Step 46573: {'lr': 5.81370365209008e-06, 'samples': 23845888, 'steps': 46573, 'batch_loss/train': 0.6920874507632107} +12/29/2021 02:17:38 - INFO - codeparrot_training - Step 46574: {'lr': 5.81032446699864e-06, 'samples': 23846400, 'steps': 46574, 'batch_loss/train': 0.7071769190952182} +12/29/2021 02:17:49 - INFO - codeparrot_training - Step 46575: {'lr': 5.8069462527131356e-06, 'samples': 23846912, 'steps': 46575, 'batch_loss/train': 0.7547582592815161} +12/29/2021 02:18:03 - INFO - codeparrot_training - Step 46576: {'lr': 5.803569009247028e-06, 'samples': 23847424, 'steps': 46576, 'batch_loss/train': 0.691774868639186} +12/29/2021 02:18:14 - INFO - codeparrot_training - Step 46577: {'lr': 5.800192736613724e-06, 'samples': 23847936, 'steps': 46577, 'batch_loss/train': 0.7323031427804381} +12/29/2021 02:18:24 - INFO - codeparrot_training - Step 46578: {'lr': 5.796817434826656e-06, 'samples': 23848448, 'steps': 46578, 'batch_loss/train': 0.7620372767560184} +12/29/2021 02:18:36 - INFO - codeparrot_training - Step 46579: {'lr': 5.793443103899204e-06, 'samples': 23848960, 'steps': 46579, 'batch_loss/train': 0.6242999371606857} +12/29/2021 02:18:47 - INFO - codeparrot_training - Step 46580: {'lr': 5.7900697438448e-06, 'samples': 23849472, 'steps': 46580, 'batch_loss/train': 0.6736194572877139} +12/29/2021 02:18:57 - INFO - codeparrot_training - Step 46581: {'lr': 5.786697354676934e-06, 'samples': 23849984, 'steps': 46581, 'batch_loss/train': 0.6833337359130383} +12/29/2021 02:19:08 - INFO - codeparrot_training - Step 46582: {'lr': 5.783325936408901e-06, 'samples': 23850496, 'steps': 46582, 'batch_loss/train': 0.7497129030525684} +12/29/2021 02:19:20 - INFO - codeparrot_training - Step 46583: {'lr': 5.779955489054161e-06, 'samples': 23851008, 'steps': 46583, 'batch_loss/train': 0.6600721096619964} +12/29/2021 02:19:31 - INFO - codeparrot_training - Step 46584: {'lr': 5.776586012626123e-06, 'samples': 23851520, 'steps': 46584, 'batch_loss/train': 0.6950307926163077} +12/29/2021 02:19:41 - INFO - codeparrot_training - Step 46585: {'lr': 5.773217507138134e-06, 'samples': 23852032, 'steps': 46585, 'batch_loss/train': 0.6698815263807774} +12/29/2021 02:19:53 - INFO - codeparrot_training - Step 46586: {'lr': 5.76984997260363e-06, 'samples': 23852544, 'steps': 46586, 'batch_loss/train': 0.6991239651106298} +12/29/2021 02:20:04 - INFO - codeparrot_training - Step 46587: {'lr': 5.766483409035989e-06, 'samples': 23853056, 'steps': 46587, 'batch_loss/train': 0.7171855855267495} +12/29/2021 02:20:15 - INFO - codeparrot_training - Step 46588: {'lr': 5.76311781644856e-06, 'samples': 23853568, 'steps': 46588, 'batch_loss/train': 0.7140407087281346} +12/29/2021 02:20:29 - INFO - codeparrot_training - Step 46589: {'lr': 5.759753194854778e-06, 'samples': 23854080, 'steps': 46589, 'batch_loss/train': 0.7166671622544527} +12/29/2021 02:20:39 - INFO - codeparrot_training - Step 46590: {'lr': 5.7563895442680205e-06, 'samples': 23854592, 'steps': 46590, 'batch_loss/train': 0.757999227847904} +12/29/2021 02:20:50 - INFO - codeparrot_training - Step 46591: {'lr': 5.753026864701583e-06, 'samples': 23855104, 'steps': 46591, 'batch_loss/train': 0.7060967488214374} +12/29/2021 02:21:02 - INFO - codeparrot_training - Step 46592: {'lr': 5.749665156168926e-06, 'samples': 23855616, 'steps': 46592, 'batch_loss/train': 0.7092402022099122} +12/29/2021 02:21:13 - INFO - codeparrot_training - Step 46593: {'lr': 5.7463044186833736e-06, 'samples': 23856128, 'steps': 46593, 'batch_loss/train': 0.7523427354171872} +12/29/2021 02:21:24 - INFO - codeparrot_training - Step 46594: {'lr': 5.742944652258303e-06, 'samples': 23856640, 'steps': 46594, 'batch_loss/train': 0.6985855139791965} +12/29/2021 02:21:34 - INFO - codeparrot_training - Step 46595: {'lr': 5.739585856906981e-06, 'samples': 23857152, 'steps': 46595, 'batch_loss/train': 0.700181461405009} +12/29/2021 02:21:48 - INFO - codeparrot_training - Step 46596: {'lr': 5.736228032642926e-06, 'samples': 23857664, 'steps': 46596, 'batch_loss/train': 0.6996875312179327} +12/29/2021 02:21:59 - INFO - codeparrot_training - Step 46597: {'lr': 5.732871179479349e-06, 'samples': 23858176, 'steps': 46597, 'batch_loss/train': 0.6629366399720311} +12/29/2021 02:22:09 - INFO - codeparrot_training - Step 46598: {'lr': 5.729515297429655e-06, 'samples': 23858688, 'steps': 46598, 'batch_loss/train': 0.6807778589427471} +12/29/2021 02:22:22 - INFO - codeparrot_training - Step 46599: {'lr': 5.726160386507167e-06, 'samples': 23859200, 'steps': 46599, 'batch_loss/train': 0.6438652561046183} +12/29/2021 02:22:32 - INFO - codeparrot_training - Step 46600: {'lr': 5.722806446725238e-06, 'samples': 23859712, 'steps': 46600, 'batch_loss/train': 1.506801032461226} +12/29/2021 02:22:43 - INFO - codeparrot_training - Step 46601: {'lr': 5.719453478097214e-06, 'samples': 23860224, 'steps': 46601, 'batch_loss/train': 0.7056369869969785} +12/29/2021 02:22:55 - INFO - codeparrot_training - Step 46602: {'lr': 5.716101480636393e-06, 'samples': 23860736, 'steps': 46602, 'batch_loss/train': 0.7414629748091102} +12/29/2021 02:23:06 - INFO - codeparrot_training - Step 46603: {'lr': 5.712750454356097e-06, 'samples': 23861248, 'steps': 46603, 'batch_loss/train': 0.7919963006861508} +12/29/2021 02:23:17 - INFO - codeparrot_training - Step 46604: {'lr': 5.709400399269704e-06, 'samples': 23861760, 'steps': 46604, 'batch_loss/train': 0.7266445837449282} +12/29/2021 02:23:27 - INFO - codeparrot_training - Step 46605: {'lr': 5.706051315390453e-06, 'samples': 23862272, 'steps': 46605, 'batch_loss/train': 0.6976522243348882} +12/29/2021 02:23:41 - INFO - codeparrot_training - Step 46606: {'lr': 5.702703202731696e-06, 'samples': 23862784, 'steps': 46606, 'batch_loss/train': 0.7655040547251701} +12/29/2021 02:23:52 - INFO - codeparrot_training - Step 46607: {'lr': 5.6993560613068095e-06, 'samples': 23863296, 'steps': 46607, 'batch_loss/train': 0.680612213909626} +12/29/2021 02:24:03 - INFO - codeparrot_training - Step 46608: {'lr': 5.696009891128978e-06, 'samples': 23863808, 'steps': 46608, 'batch_loss/train': 0.6611613219138235} +12/29/2021 02:24:15 - INFO - codeparrot_training - Step 46609: {'lr': 5.6926646922115795e-06, 'samples': 23864320, 'steps': 46609, 'batch_loss/train': 0.6983957998454571} +12/29/2021 02:24:25 - INFO - codeparrot_training - Step 46610: {'lr': 5.68932046456791e-06, 'samples': 23864832, 'steps': 46610, 'batch_loss/train': 0.6788814230822027} +12/29/2021 02:24:36 - INFO - codeparrot_training - Step 46611: {'lr': 5.6859772082112625e-06, 'samples': 23865344, 'steps': 46611, 'batch_loss/train': 0.691991753410548} +12/29/2021 02:24:48 - INFO - codeparrot_training - Step 46612: {'lr': 5.6826349231549056e-06, 'samples': 23865856, 'steps': 46612, 'batch_loss/train': 0.632502106949687} +12/29/2021 02:24:59 - INFO - codeparrot_training - Step 46613: {'lr': 5.679293609412134e-06, 'samples': 23866368, 'steps': 46613, 'batch_loss/train': 0.6700055487453938} +12/29/2021 02:25:09 - INFO - codeparrot_training - Step 46614: {'lr': 5.675953266996242e-06, 'samples': 23866880, 'steps': 46614, 'batch_loss/train': 0.6784133762121201} +12/29/2021 02:25:20 - INFO - codeparrot_training - Step 46615: {'lr': 5.6726138959205256e-06, 'samples': 23867392, 'steps': 46615, 'batch_loss/train': 0.6558375880122185} +12/29/2021 02:25:34 - INFO - codeparrot_training - Step 46616: {'lr': 5.669275496198251e-06, 'samples': 23867904, 'steps': 46616, 'batch_loss/train': 0.7658596867695451} +12/29/2021 02:25:44 - INFO - codeparrot_training - Step 46617: {'lr': 5.66593806784263e-06, 'samples': 23868416, 'steps': 46617, 'batch_loss/train': 0.8230203147977591} +12/29/2021 02:25:55 - INFO - codeparrot_training - Step 46618: {'lr': 5.662601610867013e-06, 'samples': 23868928, 'steps': 46618, 'batch_loss/train': 0.8304295567795634} +12/29/2021 02:26:07 - INFO - codeparrot_training - Step 46619: {'lr': 5.65926612528464e-06, 'samples': 23869440, 'steps': 46619, 'batch_loss/train': 0.7730727707967162} +12/29/2021 02:26:18 - INFO - codeparrot_training - Step 46620: {'lr': 5.655931611108778e-06, 'samples': 23869952, 'steps': 46620, 'batch_loss/train': 0.7439729073084891} +12/29/2021 02:26:29 - INFO - codeparrot_training - Step 46621: {'lr': 5.65259806835261e-06, 'samples': 23870464, 'steps': 46621, 'batch_loss/train': 0.783963629975915} +12/29/2021 02:26:43 - INFO - codeparrot_training - Step 46622: {'lr': 5.649265497029516e-06, 'samples': 23870976, 'steps': 46622, 'batch_loss/train': 0.6539810246322304} +12/29/2021 02:26:54 - INFO - codeparrot_training - Step 46623: {'lr': 5.64593389715265e-06, 'samples': 23871488, 'steps': 46623, 'batch_loss/train': 0.7822325127199292} +12/29/2021 02:27:04 - INFO - codeparrot_training - Step 46624: {'lr': 5.642603268735308e-06, 'samples': 23872000, 'steps': 46624, 'batch_loss/train': 0.7607897100970149} +12/29/2021 02:27:15 - INFO - codeparrot_training - Step 46625: {'lr': 5.639273611790674e-06, 'samples': 23872512, 'steps': 46625, 'batch_loss/train': 0.7984127728268504} +12/29/2021 02:27:27 - INFO - codeparrot_training - Step 46626: {'lr': 5.635944926332043e-06, 'samples': 23873024, 'steps': 46626, 'batch_loss/train': 1.428286393173039} +12/29/2021 02:27:38 - INFO - codeparrot_training - Step 46627: {'lr': 5.632617212372626e-06, 'samples': 23873536, 'steps': 46627, 'batch_loss/train': 0.7054730127565563} +12/29/2021 02:27:49 - INFO - codeparrot_training - Step 46628: {'lr': 5.629290469925636e-06, 'samples': 23874048, 'steps': 46628, 'batch_loss/train': 0.7231184476986527} +12/29/2021 02:28:01 - INFO - codeparrot_training - Step 46629: {'lr': 5.625964699004338e-06, 'samples': 23874560, 'steps': 46629, 'batch_loss/train': 0.7050271527841687} +12/29/2021 02:28:11 - INFO - codeparrot_training - Step 46630: {'lr': 5.622639899621917e-06, 'samples': 23875072, 'steps': 46630, 'batch_loss/train': 0.8143537291907705} +12/29/2021 02:28:22 - INFO - codeparrot_training - Step 46631: {'lr': 5.619316071791641e-06, 'samples': 23875584, 'steps': 46631, 'batch_loss/train': 0.7377761102980003} +12/29/2021 02:28:33 - INFO - codeparrot_training - Step 46632: {'lr': 5.615993215526638e-06, 'samples': 23876096, 'steps': 46632, 'batch_loss/train': 0.7679095058701932} +12/29/2021 02:28:45 - INFO - codeparrot_training - Step 46633: {'lr': 5.61267133084023e-06, 'samples': 23876608, 'steps': 46633, 'batch_loss/train': 0.814855198841542} +12/29/2021 02:28:55 - INFO - codeparrot_training - Step 46634: {'lr': 5.609350417745518e-06, 'samples': 23877120, 'steps': 46634, 'batch_loss/train': 0.7745488351210952} +12/29/2021 02:29:06 - INFO - codeparrot_training - Step 46635: {'lr': 5.606030476255769e-06, 'samples': 23877632, 'steps': 46635, 'batch_loss/train': 0.7058696895837784} +12/29/2021 02:29:20 - INFO - codeparrot_training - Step 46636: {'lr': 5.602711506384139e-06, 'samples': 23878144, 'steps': 46636, 'batch_loss/train': 0.6542865601368248} +12/29/2021 02:29:31 - INFO - codeparrot_training - Step 46637: {'lr': 5.599393508143868e-06, 'samples': 23878656, 'steps': 46637, 'batch_loss/train': 0.6308319107629359} +12/29/2021 02:29:41 - INFO - codeparrot_training - Step 46638: {'lr': 5.59607648154814e-06, 'samples': 23879168, 'steps': 46638, 'batch_loss/train': 0.6730236241128296} +12/29/2021 02:29:54 - INFO - codeparrot_training - Step 46639: {'lr': 5.5927604266100825e-06, 'samples': 23879680, 'steps': 46639, 'batch_loss/train': 0.6961830812506378} +12/29/2021 02:30:05 - INFO - codeparrot_training - Step 46640: {'lr': 5.589445343342964e-06, 'samples': 23880192, 'steps': 46640, 'batch_loss/train': 0.6699628615169786} +12/29/2021 02:30:15 - INFO - codeparrot_training - Step 46641: {'lr': 5.5861312317599115e-06, 'samples': 23880704, 'steps': 46641, 'batch_loss/train': 0.7911574076861143} +12/29/2021 02:30:29 - INFO - codeparrot_training - Step 46642: {'lr': 5.582818091874137e-06, 'samples': 23881216, 'steps': 46642, 'batch_loss/train': 0.6752426992170513} +12/29/2021 02:30:40 - INFO - codeparrot_training - Step 46643: {'lr': 5.579505923698741e-06, 'samples': 23881728, 'steps': 46643, 'batch_loss/train': 0.8273814241401851} +12/29/2021 02:30:50 - INFO - codeparrot_training - Step 46644: {'lr': 5.576194727246964e-06, 'samples': 23882240, 'steps': 46644, 'batch_loss/train': 0.6360481394222006} +12/29/2021 02:31:01 - INFO - codeparrot_training - Step 46645: {'lr': 5.572884502531989e-06, 'samples': 23882752, 'steps': 46645, 'batch_loss/train': 0.8045755792409182} +12/29/2021 02:31:13 - INFO - codeparrot_training - Step 46646: {'lr': 5.569575249566888e-06, 'samples': 23883264, 'steps': 46646, 'batch_loss/train': 0.7351511525921524} +12/29/2021 02:31:24 - INFO - codeparrot_training - Step 46647: {'lr': 5.566266968364847e-06, 'samples': 23883776, 'steps': 46647, 'batch_loss/train': 0.6843841903610155} +12/29/2021 02:31:34 - INFO - codeparrot_training - Step 46648: {'lr': 5.5629596589390765e-06, 'samples': 23884288, 'steps': 46648, 'batch_loss/train': 0.6895507059525698} +12/29/2021 02:31:46 - INFO - codeparrot_training - Step 46649: {'lr': 5.559653321302649e-06, 'samples': 23884800, 'steps': 46649, 'batch_loss/train': 0.694293858949095} +12/29/2021 02:31:57 - INFO - codeparrot_training - Step 46650: {'lr': 5.556347955468777e-06, 'samples': 23885312, 'steps': 46650, 'batch_loss/train': 0.5905024642124772} +12/29/2021 02:32:08 - INFO - codeparrot_training - Step 46651: {'lr': 5.5530435614505326e-06, 'samples': 23885824, 'steps': 46651, 'batch_loss/train': 0.7160331532359123} +12/29/2021 02:32:21 - INFO - codeparrot_training - Step 46652: {'lr': 5.5497401392611e-06, 'samples': 23886336, 'steps': 46652, 'batch_loss/train': 0.6911960560828447} +12/29/2021 02:32:32 - INFO - codeparrot_training - Step 46653: {'lr': 5.546437688913608e-06, 'samples': 23886848, 'steps': 46653, 'batch_loss/train': 0.8207480348646641} +12/29/2021 02:32:43 - INFO - codeparrot_training - Step 46654: {'lr': 5.543136210421157e-06, 'samples': 23887360, 'steps': 46654, 'batch_loss/train': 0.673935842292849} +12/29/2021 02:32:53 - INFO - codeparrot_training - Step 46655: {'lr': 5.5398357037969025e-06, 'samples': 23887872, 'steps': 46655, 'batch_loss/train': 0.7666379939764738} +12/29/2021 02:33:05 - INFO - codeparrot_training - Step 46656: {'lr': 5.536536169053974e-06, 'samples': 23888384, 'steps': 46656, 'batch_loss/train': 0.7092340476810932} +12/29/2021 02:33:16 - INFO - codeparrot_training - Step 46657: {'lr': 5.533237606205472e-06, 'samples': 23888896, 'steps': 46657, 'batch_loss/train': 0.63266067719087} +12/29/2021 02:33:27 - INFO - codeparrot_training - Step 46658: {'lr': 5.529940015264468e-06, 'samples': 23889408, 'steps': 46658, 'batch_loss/train': 0.7322727255523205} +12/29/2021 02:33:39 - INFO - codeparrot_training - Step 46659: {'lr': 5.526643396244174e-06, 'samples': 23889920, 'steps': 46659, 'batch_loss/train': 0.5422572773823049} +12/29/2021 02:33:50 - INFO - codeparrot_training - Step 46660: {'lr': 5.523347749157609e-06, 'samples': 23890432, 'steps': 46660, 'batch_loss/train': 0.7511174771934748} +12/29/2021 02:34:00 - INFO - codeparrot_training - Step 46661: {'lr': 5.520053074017872e-06, 'samples': 23890944, 'steps': 46661, 'batch_loss/train': 0.7360445074737072} +12/29/2021 02:34:12 - INFO - codeparrot_training - Step 46662: {'lr': 5.5167593708381205e-06, 'samples': 23891456, 'steps': 46662, 'batch_loss/train': 0.7468252656981349} +12/29/2021 02:34:23 - INFO - codeparrot_training - Step 46663: {'lr': 5.513466639631426e-06, 'samples': 23891968, 'steps': 46663, 'batch_loss/train': 0.7265853271819651} +12/29/2021 02:34:34 - INFO - codeparrot_training - Step 46664: {'lr': 5.510174880410862e-06, 'samples': 23892480, 'steps': 46664, 'batch_loss/train': 0.8060842426493764} +12/29/2021 02:34:47 - INFO - codeparrot_training - Step 46665: {'lr': 5.506884093189529e-06, 'samples': 23892992, 'steps': 46665, 'batch_loss/train': 0.8055428927764297} +12/29/2021 02:34:58 - INFO - codeparrot_training - Step 46666: {'lr': 5.503594277980528e-06, 'samples': 23893504, 'steps': 46666, 'batch_loss/train': 0.6562614301219583} +12/29/2021 02:35:09 - INFO - codeparrot_training - Step 46667: {'lr': 5.5003054347968764e-06, 'samples': 23894016, 'steps': 46667, 'batch_loss/train': 0.7146757389418781} +12/29/2021 02:35:19 - INFO - codeparrot_training - Step 46668: {'lr': 5.4970175636517575e-06, 'samples': 23894528, 'steps': 46668, 'batch_loss/train': 0.6947829034179449} +12/29/2021 02:35:31 - INFO - codeparrot_training - Step 46669: {'lr': 5.493730664558106e-06, 'samples': 23895040, 'steps': 46669, 'batch_loss/train': 0.7092430735938251} +12/29/2021 02:35:42 - INFO - codeparrot_training - Step 46670: {'lr': 5.490444737529077e-06, 'samples': 23895552, 'steps': 46670, 'batch_loss/train': 0.7342807874083519} +12/29/2021 02:35:53 - INFO - codeparrot_training - Step 46671: {'lr': 5.487159782577744e-06, 'samples': 23896064, 'steps': 46671, 'batch_loss/train': 0.7269367570988834} +12/29/2021 02:36:05 - INFO - codeparrot_training - Step 46672: {'lr': 5.483875799717125e-06, 'samples': 23896576, 'steps': 46672, 'batch_loss/train': 0.6952040363103151} +12/29/2021 02:36:16 - INFO - codeparrot_training - Step 46673: {'lr': 5.4805927889602645e-06, 'samples': 23897088, 'steps': 46673, 'batch_loss/train': 0.7205853527411819} +12/29/2021 02:36:26 - INFO - codeparrot_training - Step 46674: {'lr': 5.4773107503202635e-06, 'samples': 23897600, 'steps': 46674, 'batch_loss/train': 0.8082593586295843} +12/29/2021 02:36:41 - INFO - codeparrot_training - Step 46675: {'lr': 5.4740296838101665e-06, 'samples': 23898112, 'steps': 46675, 'batch_loss/train': 0.8222400061786175} +12/29/2021 02:36:52 - INFO - codeparrot_training - Step 46676: {'lr': 5.470749589442936e-06, 'samples': 23898624, 'steps': 46676, 'batch_loss/train': 0.7954597459174693} +12/29/2021 02:37:02 - INFO - codeparrot_training - Step 46677: {'lr': 5.467470467231755e-06, 'samples': 23899136, 'steps': 46677, 'batch_loss/train': 1.1168636130169034} +12/29/2021 02:37:13 - INFO - codeparrot_training - Step 46678: {'lr': 5.464192317189531e-06, 'samples': 23899648, 'steps': 46678, 'batch_loss/train': 0.8610239154077135} +12/29/2021 02:37:25 - INFO - codeparrot_training - Step 46679: {'lr': 5.460915139329336e-06, 'samples': 23900160, 'steps': 46679, 'batch_loss/train': 0.7669082549400628} +12/29/2021 02:37:36 - INFO - codeparrot_training - Step 46680: {'lr': 5.457638933664244e-06, 'samples': 23900672, 'steps': 46680, 'batch_loss/train': 0.7073692139238119} +12/29/2021 02:37:46 - INFO - codeparrot_training - Step 46681: {'lr': 5.4543637002072155e-06, 'samples': 23901184, 'steps': 46681, 'batch_loss/train': 0.6746920151636004} +12/29/2021 02:37:59 - INFO - codeparrot_training - Step 46682: {'lr': 5.451089438971296e-06, 'samples': 23901696, 'steps': 46682, 'batch_loss/train': 0.7142969832057133} +12/29/2021 02:38:10 - INFO - codeparrot_training - Step 46683: {'lr': 5.447816149969559e-06, 'samples': 23902208, 'steps': 46683, 'batch_loss/train': 0.6504907542839646} +12/29/2021 02:38:20 - INFO - codeparrot_training - Step 46684: {'lr': 5.44454383321491e-06, 'samples': 23902720, 'steps': 46684, 'batch_loss/train': 0.5516189441550523} +12/29/2021 02:38:31 - INFO - codeparrot_training - Step 46685: {'lr': 5.44127248872045e-06, 'samples': 23903232, 'steps': 46685, 'batch_loss/train': 0.7051782049238682} +12/29/2021 02:38:43 - INFO - codeparrot_training - Step 46686: {'lr': 5.438002116499113e-06, 'samples': 23903744, 'steps': 46686, 'batch_loss/train': 0.723902135156095} +12/29/2021 02:38:54 - INFO - codeparrot_training - Step 46687: {'lr': 5.434732716563945e-06, 'samples': 23904256, 'steps': 46687, 'batch_loss/train': 0.7906933580525219} +12/29/2021 02:39:05 - INFO - codeparrot_training - Step 46688: {'lr': 5.431464288927962e-06, 'samples': 23904768, 'steps': 46688, 'batch_loss/train': 0.7642978671938181} +12/29/2021 02:39:17 - INFO - codeparrot_training - Step 46689: {'lr': 5.428196833604127e-06, 'samples': 23905280, 'steps': 46689, 'batch_loss/train': 0.6537321922369301} +12/29/2021 02:39:27 - INFO - codeparrot_training - Step 46690: {'lr': 5.4249303506054e-06, 'samples': 23905792, 'steps': 46690, 'batch_loss/train': 0.6820286328438669} +12/29/2021 02:39:38 - INFO - codeparrot_training - Step 46691: {'lr': 5.421664839944829e-06, 'samples': 23906304, 'steps': 46691, 'batch_loss/train': 0.6826387075707316} +12/29/2021 02:39:51 - INFO - codeparrot_training - Step 46692: {'lr': 5.418400301635374e-06, 'samples': 23906816, 'steps': 46692, 'batch_loss/train': 0.9192230235785246} +12/29/2021 02:40:01 - INFO - codeparrot_training - Step 46693: {'lr': 5.4151367356899964e-06, 'samples': 23907328, 'steps': 46693, 'batch_loss/train': 0.6249417765066028} +12/29/2021 02:40:12 - INFO - codeparrot_training - Step 46694: {'lr': 5.411874142121687e-06, 'samples': 23907840, 'steps': 46694, 'batch_loss/train': 0.7578366072848439} +12/29/2021 02:40:24 - INFO - codeparrot_training - Step 46695: {'lr': 5.4086125209434065e-06, 'samples': 23908352, 'steps': 46695, 'batch_loss/train': 0.7459809682331979} +12/29/2021 02:40:35 - INFO - codeparrot_training - Step 46696: {'lr': 5.405351872168118e-06, 'samples': 23908864, 'steps': 46696, 'batch_loss/train': 0.6647077505476773} +12/29/2021 02:40:45 - INFO - codeparrot_training - Step 46697: {'lr': 5.402092195808839e-06, 'samples': 23909376, 'steps': 46697, 'batch_loss/train': 0.5510806984966621} +12/29/2021 02:40:56 - INFO - codeparrot_training - Step 46698: {'lr': 5.398833491878446e-06, 'samples': 23909888, 'steps': 46698, 'batch_loss/train': 0.6862701531499624} +12/29/2021 02:41:09 - INFO - codeparrot_training - Step 46699: {'lr': 5.395575760389931e-06, 'samples': 23910400, 'steps': 46699, 'batch_loss/train': 0.813508449587971} +12/29/2021 02:41:19 - INFO - codeparrot_training - Step 46700: {'lr': 5.392319001356283e-06, 'samples': 23910912, 'steps': 46700, 'batch_loss/train': 0.70306331734173} +12/29/2021 02:41:30 - INFO - codeparrot_training - Step 46701: {'lr': 5.3890632147903795e-06, 'samples': 23911424, 'steps': 46701, 'batch_loss/train': 0.8062673814129084} +12/29/2021 02:41:42 - INFO - codeparrot_training - Step 46702: {'lr': 5.385808400705211e-06, 'samples': 23911936, 'steps': 46702, 'batch_loss/train': 0.8289758050232194} +12/29/2021 02:41:53 - INFO - codeparrot_training - Step 46703: {'lr': 5.382554559113712e-06, 'samples': 23912448, 'steps': 46703, 'batch_loss/train': 0.6265262723900378} +12/29/2021 02:42:03 - INFO - codeparrot_training - Step 46704: {'lr': 5.379301690028815e-06, 'samples': 23912960, 'steps': 46704, 'batch_loss/train': 0.6946486621163785} +12/29/2021 02:42:15 - INFO - codeparrot_training - Step 46705: {'lr': 5.376049793463428e-06, 'samples': 23913472, 'steps': 46705, 'batch_loss/train': 0.7288937270641327} +12/29/2021 02:42:26 - INFO - codeparrot_training - Step 46706: {'lr': 5.372798869430512e-06, 'samples': 23913984, 'steps': 46706, 'batch_loss/train': 0.7129444014281034} +12/29/2021 02:42:37 - INFO - codeparrot_training - Step 46707: {'lr': 5.369548917943001e-06, 'samples': 23914496, 'steps': 46707, 'batch_loss/train': 0.5858921430190094} +12/29/2021 02:42:47 - INFO - codeparrot_training - Step 46708: {'lr': 5.366299939013775e-06, 'samples': 23915008, 'steps': 46708, 'batch_loss/train': 0.7407867155270651} +12/29/2021 02:43:00 - INFO - codeparrot_training - Step 46709: {'lr': 5.363051932655793e-06, 'samples': 23915520, 'steps': 46709, 'batch_loss/train': 0.7330227429047227} +12/29/2021 02:43:10 - INFO - codeparrot_training - Step 46710: {'lr': 5.359804898881881e-06, 'samples': 23916032, 'steps': 46710, 'batch_loss/train': 0.7228787420317531} +12/29/2021 02:43:21 - INFO - codeparrot_training - Step 46711: {'lr': 5.356558837705056e-06, 'samples': 23916544, 'steps': 46711, 'batch_loss/train': 0.8102666204795241} +12/29/2021 02:43:34 - INFO - codeparrot_training - Step 46712: {'lr': 5.353313749138194e-06, 'samples': 23917056, 'steps': 46712, 'batch_loss/train': 0.7332541118375957} +12/29/2021 02:43:45 - INFO - codeparrot_training - Step 46713: {'lr': 5.350069633194149e-06, 'samples': 23917568, 'steps': 46713, 'batch_loss/train': 0.5952826165594161} +12/29/2021 02:43:55 - INFO - codeparrot_training - Step 46714: {'lr': 5.346826489885881e-06, 'samples': 23918080, 'steps': 46714, 'batch_loss/train': 0.6699930997565389} +12/29/2021 02:44:08 - INFO - codeparrot_training - Step 46715: {'lr': 5.343584319226241e-06, 'samples': 23918592, 'steps': 46715, 'batch_loss/train': 0.760097160586156} +12/29/2021 02:44:19 - INFO - codeparrot_training - Step 46716: {'lr': 5.340343121228136e-06, 'samples': 23919104, 'steps': 46716, 'batch_loss/train': 0.5845822533592582} +12/29/2021 02:44:29 - INFO - codeparrot_training - Step 46717: {'lr': 5.337102895904444e-06, 'samples': 23919616, 'steps': 46717, 'batch_loss/train': 0.7298959731124341} +12/29/2021 02:44:40 - INFO - codeparrot_training - Step 46718: {'lr': 5.333863643268045e-06, 'samples': 23920128, 'steps': 46718, 'batch_loss/train': 0.7215645834803581} +12/29/2021 02:44:52 - INFO - codeparrot_training - Step 46719: {'lr': 5.330625363331815e-06, 'samples': 23920640, 'steps': 46719, 'batch_loss/train': 0.7213813476264477} +12/29/2021 02:45:03 - INFO - codeparrot_training - Step 46720: {'lr': 5.327388056108634e-06, 'samples': 23921152, 'steps': 46720, 'batch_loss/train': 0.5735275589686353} +12/29/2021 02:45:13 - INFO - codeparrot_training - Step 46721: {'lr': 5.324151721611381e-06, 'samples': 23921664, 'steps': 46721, 'batch_loss/train': 0.7467954587191343} +12/29/2021 02:45:26 - INFO - codeparrot_training - Step 46722: {'lr': 5.320916359852934e-06, 'samples': 23922176, 'steps': 46722, 'batch_loss/train': 0.6528745554387569} +12/29/2021 02:45:37 - INFO - codeparrot_training - Step 46723: {'lr': 5.317681970846117e-06, 'samples': 23922688, 'steps': 46723, 'batch_loss/train': 0.6649202110711485} +12/29/2021 02:45:47 - INFO - codeparrot_training - Step 46724: {'lr': 5.314448554603807e-06, 'samples': 23923200, 'steps': 46724, 'batch_loss/train': 0.6904139583930373} +12/29/2021 02:46:00 - INFO - codeparrot_training - Step 46725: {'lr': 5.3112161111388556e-06, 'samples': 23923712, 'steps': 46725, 'batch_loss/train': 0.6867979457601905} +12/29/2021 02:46:10 - INFO - codeparrot_training - Step 46726: {'lr': 5.307984640464142e-06, 'samples': 23924224, 'steps': 46726, 'batch_loss/train': 0.6495155391748995} +12/29/2021 02:46:21 - INFO - codeparrot_training - Step 46727: {'lr': 5.304754142592461e-06, 'samples': 23924736, 'steps': 46727, 'batch_loss/train': 0.6599216380855069} +12/29/2021 02:46:31 - INFO - codeparrot_training - Step 46728: {'lr': 5.30152461753669e-06, 'samples': 23925248, 'steps': 46728, 'batch_loss/train': 0.6538081028265879} +12/29/2021 02:46:44 - INFO - codeparrot_training - Step 46729: {'lr': 5.298296065309682e-06, 'samples': 23925760, 'steps': 46729, 'batch_loss/train': 0.8189736793283373} +12/29/2021 02:46:54 - INFO - codeparrot_training - Step 46730: {'lr': 5.295068485924232e-06, 'samples': 23926272, 'steps': 46730, 'batch_loss/train': 0.6970726111903787} +12/29/2021 02:47:05 - INFO - codeparrot_training - Step 46731: {'lr': 5.291841879393189e-06, 'samples': 23926784, 'steps': 46731, 'batch_loss/train': 0.6613683978503104} +12/29/2021 02:47:18 - INFO - codeparrot_training - Step 46732: {'lr': 5.2886162457294055e-06, 'samples': 23927296, 'steps': 46732, 'batch_loss/train': 0.617215350328479} +12/29/2021 02:47:29 - INFO - codeparrot_training - Step 46733: {'lr': 5.285391584945676e-06, 'samples': 23927808, 'steps': 46733, 'batch_loss/train': 0.618141520768404} +12/29/2021 02:47:39 - INFO - codeparrot_training - Step 46734: {'lr': 5.282167897054823e-06, 'samples': 23928320, 'steps': 46734, 'batch_loss/train': 0.6969947358593345} +12/29/2021 02:47:51 - INFO - codeparrot_training - Step 46735: {'lr': 5.278945182069672e-06, 'samples': 23928832, 'steps': 46735, 'batch_loss/train': 0.930425884667784} +12/29/2021 02:48:02 - INFO - codeparrot_training - Step 46736: {'lr': 5.275723440003016e-06, 'samples': 23929344, 'steps': 46736, 'batch_loss/train': 0.6390919201076031} +12/29/2021 02:48:13 - INFO - codeparrot_training - Step 46737: {'lr': 5.272502670867707e-06, 'samples': 23929856, 'steps': 46737, 'batch_loss/train': 0.6580514639499597} +12/29/2021 02:48:23 - INFO - codeparrot_training - Step 46738: {'lr': 5.26928287467654e-06, 'samples': 23930368, 'steps': 46738, 'batch_loss/train': 0.7377248386619613} +12/29/2021 02:48:36 - INFO - codeparrot_training - Step 46739: {'lr': 5.266064051442226e-06, 'samples': 23930880, 'steps': 46739, 'batch_loss/train': 0.7471164495800622} +12/29/2021 02:48:46 - INFO - codeparrot_training - Step 46740: {'lr': 5.262846201177702e-06, 'samples': 23931392, 'steps': 46740, 'batch_loss/train': 0.4950397973298095} +12/29/2021 02:48:57 - INFO - codeparrot_training - Step 46741: {'lr': 5.259629323895676e-06, 'samples': 23931904, 'steps': 46741, 'batch_loss/train': 0.7767962636426091} +12/29/2021 02:49:10 - INFO - codeparrot_training - Step 46742: {'lr': 5.256413419608918e-06, 'samples': 23932416, 'steps': 46742, 'batch_loss/train': 0.6464419178664684} +12/29/2021 02:49:21 - INFO - codeparrot_training - Step 46743: {'lr': 5.25319848833028e-06, 'samples': 23932928, 'steps': 46743, 'batch_loss/train': 0.7040240857750177} +12/29/2021 02:49:31 - INFO - codeparrot_training - Step 46744: {'lr': 5.249984530072499e-06, 'samples': 23933440, 'steps': 46744, 'batch_loss/train': 0.6178882874082774} +12/29/2021 02:49:43 - INFO - codeparrot_training - Step 46745: {'lr': 5.2467715448483725e-06, 'samples': 23933952, 'steps': 46745, 'batch_loss/train': 0.7240029606036842} +12/29/2021 02:49:54 - INFO - codeparrot_training - Step 46746: {'lr': 5.2435595326706665e-06, 'samples': 23934464, 'steps': 46746, 'batch_loss/train': 0.6746165929362178} +12/29/2021 02:50:05 - INFO - codeparrot_training - Step 46747: {'lr': 5.2403484935521495e-06, 'samples': 23934976, 'steps': 46747, 'batch_loss/train': 0.7342611937783659} +12/29/2021 02:50:15 - INFO - codeparrot_training - Step 46748: {'lr': 5.237138427505589e-06, 'samples': 23935488, 'steps': 46748, 'batch_loss/train': 0.7432134905830026} +12/29/2021 02:50:27 - INFO - codeparrot_training - Step 46749: {'lr': 5.23392933454378e-06, 'samples': 23936000, 'steps': 46749, 'batch_loss/train': 0.8015781287103891} +12/29/2021 02:50:38 - INFO - codeparrot_training - Step 46750: {'lr': 5.230721214679407e-06, 'samples': 23936512, 'steps': 46750, 'batch_loss/train': 0.6610657079145312} +12/29/2021 02:50:49 - INFO - codeparrot_training - Step 46751: {'lr': 5.2275140679252656e-06, 'samples': 23937024, 'steps': 46751, 'batch_loss/train': 0.6570887635461986} +12/29/2021 02:51:02 - INFO - codeparrot_training - Step 46752: {'lr': 5.224307894294122e-06, 'samples': 23937536, 'steps': 46752, 'batch_loss/train': 0.7370776310563087} +12/29/2021 02:51:12 - INFO - codeparrot_training - Step 46753: {'lr': 5.221102693798718e-06, 'samples': 23938048, 'steps': 46753, 'batch_loss/train': 0.7315803468227386} +12/29/2021 02:51:23 - INFO - codeparrot_training - Step 46754: {'lr': 5.217898466451737e-06, 'samples': 23938560, 'steps': 46754, 'batch_loss/train': 0.7691934499889612} +12/29/2021 02:51:35 - INFO - codeparrot_training - Step 46755: {'lr': 5.2146952122660296e-06, 'samples': 23939072, 'steps': 46755, 'batch_loss/train': 0.7559731146320701} +12/29/2021 02:51:46 - INFO - codeparrot_training - Step 46756: {'lr': 5.211492931254225e-06, 'samples': 23939584, 'steps': 46756, 'batch_loss/train': 0.6534105360042304} +12/29/2021 02:51:56 - INFO - codeparrot_training - Step 46757: {'lr': 5.20829162342909e-06, 'samples': 23940096, 'steps': 46757, 'batch_loss/train': 0.8031455152668059} +12/29/2021 02:52:09 - INFO - codeparrot_training - Step 46758: {'lr': 5.205091288803365e-06, 'samples': 23940608, 'steps': 46758, 'batch_loss/train': 0.5697978881944437} +12/29/2021 02:52:20 - INFO - codeparrot_training - Step 46759: {'lr': 5.20189192738979e-06, 'samples': 23941120, 'steps': 46759, 'batch_loss/train': 0.8061553332954645} +12/29/2021 02:52:30 - INFO - codeparrot_training - Step 46760: {'lr': 5.1986935392010215e-06, 'samples': 23941632, 'steps': 46760, 'batch_loss/train': 0.6113572251051664} +12/29/2021 02:52:41 - INFO - codeparrot_training - Step 46761: {'lr': 5.195496124249855e-06, 'samples': 23942144, 'steps': 46761, 'batch_loss/train': 0.6755033910740167} +12/29/2021 02:52:53 - INFO - codeparrot_training - Step 46762: {'lr': 5.192299682548946e-06, 'samples': 23942656, 'steps': 46762, 'batch_loss/train': 0.7698778579942882} +12/29/2021 02:53:04 - INFO - codeparrot_training - Step 46763: {'lr': 5.1891042141110065e-06, 'samples': 23943168, 'steps': 46763, 'batch_loss/train': 0.6823506942018867} +12/29/2021 02:53:14 - INFO - codeparrot_training - Step 46764: {'lr': 5.185909718948778e-06, 'samples': 23943680, 'steps': 46764, 'batch_loss/train': 0.6613538681995124} +12/29/2021 02:53:27 - INFO - codeparrot_training - Step 46765: {'lr': 5.1827161970748885e-06, 'samples': 23944192, 'steps': 46765, 'batch_loss/train': 0.5516468775167596} +12/29/2021 02:53:37 - INFO - codeparrot_training - Step 46766: {'lr': 5.1795236485021046e-06, 'samples': 23944704, 'steps': 46766, 'batch_loss/train': 0.7092262096703053} +12/29/2021 02:53:48 - INFO - codeparrot_training - Step 46767: {'lr': 5.176332073243112e-06, 'samples': 23945216, 'steps': 46767, 'batch_loss/train': 0.7183510067407042} +12/29/2021 02:54:00 - INFO - codeparrot_training - Step 46768: {'lr': 5.173141471310566e-06, 'samples': 23945728, 'steps': 46768, 'batch_loss/train': 0.788479776121676} +12/29/2021 02:54:11 - INFO - codeparrot_training - Step 46769: {'lr': 5.169951842717152e-06, 'samples': 23946240, 'steps': 46769, 'batch_loss/train': 0.7563297059386969} +12/29/2021 02:54:21 - INFO - codeparrot_training - Step 46770: {'lr': 5.166763187475582e-06, 'samples': 23946752, 'steps': 46770, 'batch_loss/train': 0.7074837256222963} +12/29/2021 02:54:32 - INFO - codeparrot_training - Step 46771: {'lr': 5.163575505598511e-06, 'samples': 23947264, 'steps': 46771, 'batch_loss/train': 0.7592099388712086} +12/29/2021 02:54:45 - INFO - codeparrot_training - Step 46772: {'lr': 5.160388797098625e-06, 'samples': 23947776, 'steps': 46772, 'batch_loss/train': 0.6359245562925935} +12/29/2021 02:54:55 - INFO - codeparrot_training - Step 46773: {'lr': 5.15720306198858e-06, 'samples': 23948288, 'steps': 46773, 'batch_loss/train': 0.7168127377517521} +12/29/2021 02:55:06 - INFO - codeparrot_training - Step 46774: {'lr': 5.154018300281033e-06, 'samples': 23948800, 'steps': 46774, 'batch_loss/train': 0.7942498084157705} +12/29/2021 02:55:18 - INFO - codeparrot_training - Step 46775: {'lr': 5.150834511988694e-06, 'samples': 23949312, 'steps': 46775, 'batch_loss/train': 0.7235818281769753} +12/29/2021 02:55:29 - INFO - codeparrot_training - Step 46776: {'lr': 5.14765169712414e-06, 'samples': 23949824, 'steps': 46776, 'batch_loss/train': 0.6776129305362701} +12/29/2021 02:55:39 - INFO - codeparrot_training - Step 46777: {'lr': 5.144469855700107e-06, 'samples': 23950336, 'steps': 46777, 'batch_loss/train': 0.6749056214466691} +12/29/2021 02:55:52 - INFO - codeparrot_training - Step 46778: {'lr': 5.141288987729226e-06, 'samples': 23950848, 'steps': 46778, 'batch_loss/train': 0.7689493116922677} +12/29/2021 02:56:02 - INFO - codeparrot_training - Step 46779: {'lr': 5.13810909322407e-06, 'samples': 23951360, 'steps': 46779, 'batch_loss/train': 0.6798284472897649} +12/29/2021 02:56:13 - INFO - codeparrot_training - Step 46780: {'lr': 5.13493017219735e-06, 'samples': 23951872, 'steps': 46780, 'batch_loss/train': 0.7343373596668243} +12/29/2021 02:56:24 - INFO - codeparrot_training - Step 46781: {'lr': 5.131752224661723e-06, 'samples': 23952384, 'steps': 46781, 'batch_loss/train': 0.6718319905921817} +12/29/2021 02:56:37 - INFO - codeparrot_training - Step 46782: {'lr': 5.128575250629763e-06, 'samples': 23952896, 'steps': 46782, 'batch_loss/train': 0.7050471445545554} +12/29/2021 02:56:47 - INFO - codeparrot_training - Step 46783: {'lr': 5.125399250114126e-06, 'samples': 23953408, 'steps': 46783, 'batch_loss/train': 0.9222842380404472} +12/29/2021 02:56:58 - INFO - codeparrot_training - Step 46784: {'lr': 5.122224223127442e-06, 'samples': 23953920, 'steps': 46784, 'batch_loss/train': 1.2502910975017585} +12/29/2021 02:57:10 - INFO - codeparrot_training - Step 46785: {'lr': 5.119050169682337e-06, 'samples': 23954432, 'steps': 46785, 'batch_loss/train': 0.7414008444175124} +12/29/2021 02:57:21 - INFO - codeparrot_training - Step 46786: {'lr': 5.115877089791416e-06, 'samples': 23954944, 'steps': 46786, 'batch_loss/train': 0.7327363472431898} +12/29/2021 02:57:31 - INFO - codeparrot_training - Step 46787: {'lr': 5.112704983467331e-06, 'samples': 23955456, 'steps': 46787, 'batch_loss/train': 0.7066285137552768} +12/29/2021 02:57:43 - INFO - codeparrot_training - Step 46788: {'lr': 5.109533850722631e-06, 'samples': 23955968, 'steps': 46788, 'batch_loss/train': 0.7598563591018319} +12/29/2021 02:57:54 - INFO - codeparrot_training - Step 46789: {'lr': 5.106363691569971e-06, 'samples': 23956480, 'steps': 46789, 'batch_loss/train': 0.7099934946745634} +12/29/2021 02:58:04 - INFO - codeparrot_training - Step 46790: {'lr': 5.1031945060219795e-06, 'samples': 23956992, 'steps': 46790, 'batch_loss/train': 0.6229037962621078} +12/29/2021 02:58:15 - INFO - codeparrot_training - Step 46791: {'lr': 5.100026294091148e-06, 'samples': 23957504, 'steps': 46791, 'batch_loss/train': 0.7369984087417834} +12/29/2021 02:58:28 - INFO - codeparrot_training - Step 46792: {'lr': 5.0968590557901594e-06, 'samples': 23958016, 'steps': 46792, 'batch_loss/train': 0.7083963612094522} +12/29/2021 02:58:39 - INFO - codeparrot_training - Step 46793: {'lr': 5.093692791131643e-06, 'samples': 23958528, 'steps': 46793, 'batch_loss/train': 0.647058577160351} +12/29/2021 02:58:49 - INFO - codeparrot_training - Step 46794: {'lr': 5.090527500128089e-06, 'samples': 23959040, 'steps': 46794, 'batch_loss/train': 0.7371122662443668} +12/29/2021 02:59:01 - INFO - codeparrot_training - Step 46795: {'lr': 5.087363182792099e-06, 'samples': 23959552, 'steps': 46795, 'batch_loss/train': 1.4148138673044741} +12/29/2021 02:59:12 - INFO - codeparrot_training - Step 46796: {'lr': 5.0841998391363e-06, 'samples': 23960064, 'steps': 46796, 'batch_loss/train': 0.6756950113922358} +12/29/2021 02:59:23 - INFO - codeparrot_training - Step 46797: {'lr': 5.081037469173266e-06, 'samples': 23960576, 'steps': 46797, 'batch_loss/train': 0.6495742124971002} +12/29/2021 02:59:35 - INFO - codeparrot_training - Step 46798: {'lr': 5.077876072915516e-06, 'samples': 23961088, 'steps': 46798, 'batch_loss/train': 0.7844638000242412} +12/29/2021 02:59:46 - INFO - codeparrot_training - Step 46799: {'lr': 5.0747156503756774e-06, 'samples': 23961600, 'steps': 46799, 'batch_loss/train': 0.6823126235976815} +12/29/2021 02:59:57 - INFO - codeparrot_training - Step 46800: {'lr': 5.071556201566268e-06, 'samples': 23962112, 'steps': 46800, 'batch_loss/train': 0.6956045059487224} +12/29/2021 03:00:07 - INFO - codeparrot_training - Step 46801: {'lr': 5.06839772649989e-06, 'samples': 23962624, 'steps': 46801, 'batch_loss/train': 0.7156153172254562} +12/29/2021 03:00:19 - INFO - codeparrot_training - Step 46802: {'lr': 5.06524022518906e-06, 'samples': 23963136, 'steps': 46802, 'batch_loss/train': 0.7006361917592585} +12/29/2021 03:00:30 - INFO - codeparrot_training - Step 46803: {'lr': 5.062083697646353e-06, 'samples': 23963648, 'steps': 46803, 'batch_loss/train': 0.7029376965947449} +12/29/2021 03:00:41 - INFO - codeparrot_training - Step 46804: {'lr': 5.058928143884367e-06, 'samples': 23964160, 'steps': 46804, 'batch_loss/train': 0.7197096173185855} +12/29/2021 03:00:53 - INFO - codeparrot_training - Step 46805: {'lr': 5.055773563915539e-06, 'samples': 23964672, 'steps': 46805, 'batch_loss/train': 0.7363464459776878} +12/29/2021 03:01:03 - INFO - codeparrot_training - Step 46806: {'lr': 5.052619957752469e-06, 'samples': 23965184, 'steps': 46806, 'batch_loss/train': 0.6895359251648188} +12/29/2021 03:01:14 - INFO - codeparrot_training - Step 46807: {'lr': 5.049467325407731e-06, 'samples': 23965696, 'steps': 46807, 'batch_loss/train': 0.6904151090420783} +12/29/2021 03:01:27 - INFO - codeparrot_training - Step 46808: {'lr': 5.046315666893786e-06, 'samples': 23966208, 'steps': 46808, 'batch_loss/train': 0.7616470232605934} +12/29/2021 03:01:37 - INFO - codeparrot_training - Step 46809: {'lr': 5.043164982223181e-06, 'samples': 23966720, 'steps': 46809, 'batch_loss/train': 0.6939738811925054} +12/29/2021 03:01:48 - INFO - codeparrot_training - Step 46810: {'lr': 5.0400152714085156e-06, 'samples': 23967232, 'steps': 46810, 'batch_loss/train': 0.7255350803025067} +12/29/2021 03:01:59 - INFO - codeparrot_training - Step 46811: {'lr': 5.036866534462226e-06, 'samples': 23967744, 'steps': 46811, 'batch_loss/train': 0.7869552690535784} +12/29/2021 03:02:11 - INFO - codeparrot_training - Step 46812: {'lr': 5.033718771396883e-06, 'samples': 23968256, 'steps': 46812, 'batch_loss/train': 0.6593768103048205} +12/29/2021 03:02:21 - INFO - codeparrot_training - Step 46813: {'lr': 5.030571982224952e-06, 'samples': 23968768, 'steps': 46813, 'batch_loss/train': 0.6544525395147502} +12/29/2021 03:02:32 - INFO - codeparrot_training - Step 46814: {'lr': 5.027426166958976e-06, 'samples': 23969280, 'steps': 46814, 'batch_loss/train': 0.7475277716293931} +12/29/2021 03:02:44 - INFO - codeparrot_training - Step 46815: {'lr': 5.024281325611446e-06, 'samples': 23969792, 'steps': 46815, 'batch_loss/train': 0.6489277444779873} +12/29/2021 03:02:55 - INFO - codeparrot_training - Step 46816: {'lr': 5.021137458194908e-06, 'samples': 23970304, 'steps': 46816, 'batch_loss/train': 0.7599233742803335} +12/29/2021 03:03:05 - INFO - codeparrot_training - Step 46817: {'lr': 5.017994564721795e-06, 'samples': 23970816, 'steps': 46817, 'batch_loss/train': 0.7646086560562253} +12/29/2021 03:03:17 - INFO - codeparrot_training - Step 46818: {'lr': 5.014852645204654e-06, 'samples': 23971328, 'steps': 46818, 'batch_loss/train': 0.7372558675706387} +12/29/2021 03:03:28 - INFO - codeparrot_training - Step 46819: {'lr': 5.0117116996559465e-06, 'samples': 23971840, 'steps': 46819, 'batch_loss/train': 0.7640931280329823} +12/29/2021 03:03:39 - INFO - codeparrot_training - Step 46820: {'lr': 5.008571728088162e-06, 'samples': 23972352, 'steps': 46820, 'batch_loss/train': 0.6829312858171761} +12/29/2021 03:03:52 - INFO - codeparrot_training - Step 46821: {'lr': 5.005432730513821e-06, 'samples': 23972864, 'steps': 46821, 'batch_loss/train': 0.6837489046156406} +12/29/2021 03:04:02 - INFO - codeparrot_training - Step 46822: {'lr': 5.0022947069453265e-06, 'samples': 23973376, 'steps': 46822, 'batch_loss/train': 0.8005366390570998} +12/29/2021 03:04:13 - INFO - codeparrot_training - Step 46823: {'lr': 4.999157657395226e-06, 'samples': 23973888, 'steps': 46823, 'batch_loss/train': 0.6859450773335993} +12/29/2021 03:04:23 - INFO - codeparrot_training - Step 46824: {'lr': 4.9960215818759815e-06, 'samples': 23974400, 'steps': 46824, 'batch_loss/train': 1.5333657497540116} +12/29/2021 03:04:36 - INFO - codeparrot_training - Step 46825: {'lr': 4.9928864804000276e-06, 'samples': 23974912, 'steps': 46825, 'batch_loss/train': 0.6356637319549918} +12/29/2021 03:04:47 - INFO - codeparrot_training - Step 46826: {'lr': 4.989752352979826e-06, 'samples': 23975424, 'steps': 46826, 'batch_loss/train': 0.6147769116796553} +12/29/2021 03:04:57 - INFO - codeparrot_training - Step 46827: {'lr': 4.986619199627895e-06, 'samples': 23975936, 'steps': 46827, 'batch_loss/train': 0.7821107031777501} +12/29/2021 03:05:10 - INFO - codeparrot_training - Step 46828: {'lr': 4.983487020356614e-06, 'samples': 23976448, 'steps': 46828, 'batch_loss/train': 0.7180587965995073} +12/29/2021 03:05:21 - INFO - codeparrot_training - Step 46829: {'lr': 4.9803558151785e-06, 'samples': 23976960, 'steps': 46829, 'batch_loss/train': 0.7319239871576428} +12/29/2021 03:05:31 - INFO - codeparrot_training - Step 46830: {'lr': 4.9772255841059876e-06, 'samples': 23977472, 'steps': 46830, 'batch_loss/train': 0.7115958742797375} +12/29/2021 03:05:43 - INFO - codeparrot_training - Step 46831: {'lr': 4.974096327151512e-06, 'samples': 23977984, 'steps': 46831, 'batch_loss/train': 0.7372012166306376} +12/29/2021 03:05:54 - INFO - codeparrot_training - Step 46832: {'lr': 4.970968044327451e-06, 'samples': 23978496, 'steps': 46832, 'batch_loss/train': 0.6661312756477855} +12/29/2021 03:06:05 - INFO - codeparrot_training - Step 46833: {'lr': 4.967840735646379e-06, 'samples': 23979008, 'steps': 46833, 'batch_loss/train': 0.645197672303766} +12/29/2021 03:06:15 - INFO - codeparrot_training - Step 46834: {'lr': 4.964714401120618e-06, 'samples': 23979520, 'steps': 46834, 'batch_loss/train': 0.778684510383755} +12/29/2021 03:06:27 - INFO - codeparrot_training - Step 46835: {'lr': 4.961589040762604e-06, 'samples': 23980032, 'steps': 46835, 'batch_loss/train': 0.7101948782801628} +12/29/2021 03:06:38 - INFO - codeparrot_training - Step 46836: {'lr': 4.958464654584827e-06, 'samples': 23980544, 'steps': 46836, 'batch_loss/train': 0.6266470269765705} +12/29/2021 03:06:49 - INFO - codeparrot_training - Step 46837: {'lr': 4.955341242599637e-06, 'samples': 23981056, 'steps': 46837, 'batch_loss/train': 0.7005469088908285} +12/29/2021 03:07:01 - INFO - codeparrot_training - Step 46838: {'lr': 4.9522188048195245e-06, 'samples': 23981568, 'steps': 46838, 'batch_loss/train': 0.6837882399559021} +12/29/2021 03:07:12 - INFO - codeparrot_training - Step 46839: {'lr': 4.949097341256842e-06, 'samples': 23982080, 'steps': 46839, 'batch_loss/train': 0.6886977106332779} +12/29/2021 03:07:23 - INFO - codeparrot_training - Step 46840: {'lr': 4.945976851924022e-06, 'samples': 23982592, 'steps': 46840, 'batch_loss/train': 0.6864454234018922} +12/29/2021 03:07:35 - INFO - codeparrot_training - Step 46841: {'lr': 4.942857336833445e-06, 'samples': 23983104, 'steps': 46841, 'batch_loss/train': 0.7747632344253361} +12/29/2021 03:07:45 - INFO - codeparrot_training - Step 46842: {'lr': 4.9397387959976e-06, 'samples': 23983616, 'steps': 46842, 'batch_loss/train': 0.7999403569847345} +12/29/2021 03:07:56 - INFO - codeparrot_training - Step 46843: {'lr': 4.936621229428756e-06, 'samples': 23984128, 'steps': 46843, 'batch_loss/train': 0.7025643554516137} +12/29/2021 03:08:07 - INFO - codeparrot_training - Step 46844: {'lr': 4.933504637139402e-06, 'samples': 23984640, 'steps': 46844, 'batch_loss/train': 0.7016371302306652} +12/29/2021 03:08:19 - INFO - codeparrot_training - Step 46845: {'lr': 4.930389019141917e-06, 'samples': 23985152, 'steps': 46845, 'batch_loss/train': 0.6995283517753705} +12/29/2021 03:08:30 - INFO - codeparrot_training - Step 46846: {'lr': 4.927274375448626e-06, 'samples': 23985664, 'steps': 46846, 'batch_loss/train': 0.7624665712937713} +12/29/2021 03:08:41 - INFO - codeparrot_training - Step 46847: {'lr': 4.924160706071989e-06, 'samples': 23986176, 'steps': 46847, 'batch_loss/train': 0.6763610441703349} +12/29/2021 03:08:53 - INFO - codeparrot_training - Step 46848: {'lr': 4.92104801102436e-06, 'samples': 23986688, 'steps': 46848, 'batch_loss/train': 0.7041364493779838} +12/29/2021 03:09:04 - INFO - codeparrot_training - Step 46849: {'lr': 4.917936290318087e-06, 'samples': 23987200, 'steps': 46849, 'batch_loss/train': 0.7097812360152602} +12/29/2021 03:09:14 - INFO - codeparrot_training - Step 46850: {'lr': 4.9148255439655785e-06, 'samples': 23987712, 'steps': 46850, 'batch_loss/train': 0.7184916024561971} +12/29/2021 03:09:27 - INFO - codeparrot_training - Step 46851: {'lr': 4.911715771979159e-06, 'samples': 23988224, 'steps': 46851, 'batch_loss/train': 0.7457053600810468} +12/29/2021 03:09:37 - INFO - codeparrot_training - Step 46852: {'lr': 4.9086069743712325e-06, 'samples': 23988736, 'steps': 46852, 'batch_loss/train': 0.7196397380903363} +12/29/2021 03:09:48 - INFO - codeparrot_training - Step 46853: {'lr': 4.905499151154124e-06, 'samples': 23989248, 'steps': 46853, 'batch_loss/train': 0.7509152577258646} +12/29/2021 03:09:58 - INFO - codeparrot_training - Step 46854: {'lr': 4.902392302340214e-06, 'samples': 23989760, 'steps': 46854, 'batch_loss/train': 0.6631721092853695} +12/29/2021 03:10:11 - INFO - codeparrot_training - Step 46855: {'lr': 4.899286427941851e-06, 'samples': 23990272, 'steps': 46855, 'batch_loss/train': 0.8507082983851433} +12/29/2021 03:10:21 - INFO - codeparrot_training - Step 46856: {'lr': 4.896181527971389e-06, 'samples': 23990784, 'steps': 46856, 'batch_loss/train': 0.6706173825077713} +12/29/2021 03:10:32 - INFO - codeparrot_training - Step 46857: {'lr': 4.893077602441176e-06, 'samples': 23991296, 'steps': 46857, 'batch_loss/train': 0.8112341295927763} +12/29/2021 03:10:44 - INFO - codeparrot_training - Step 46858: {'lr': 4.889974651363482e-06, 'samples': 23991808, 'steps': 46858, 'batch_loss/train': 0.7646121839061379} +12/29/2021 03:10:55 - INFO - codeparrot_training - Step 46859: {'lr': 4.886872674750742e-06, 'samples': 23992320, 'steps': 46859, 'batch_loss/train': 0.7190771301393397} +12/29/2021 03:11:05 - INFO - codeparrot_training - Step 46860: {'lr': 4.88377167261525e-06, 'samples': 23992832, 'steps': 46860, 'batch_loss/train': 0.7765679024159908} +12/29/2021 03:11:18 - INFO - codeparrot_training - Step 46861: {'lr': 4.880671644969276e-06, 'samples': 23993344, 'steps': 46861, 'batch_loss/train': 0.7794700618833303} +12/29/2021 03:11:29 - INFO - codeparrot_training - Step 46862: {'lr': 4.877572591825252e-06, 'samples': 23993856, 'steps': 46862, 'batch_loss/train': 0.7084351303055882} +12/29/2021 03:11:39 - INFO - codeparrot_training - Step 46863: {'lr': 4.8744745131954485e-06, 'samples': 23994368, 'steps': 46863, 'batch_loss/train': 0.6744514214806259} +12/29/2021 03:11:51 - INFO - codeparrot_training - Step 46864: {'lr': 4.871377409092159e-06, 'samples': 23994880, 'steps': 46864, 'batch_loss/train': 0.6691917679854669} +12/29/2021 03:12:02 - INFO - codeparrot_training - Step 46865: {'lr': 4.868281279527708e-06, 'samples': 23995392, 'steps': 46865, 'batch_loss/train': 0.797234246507287} +12/29/2021 03:12:13 - INFO - codeparrot_training - Step 46866: {'lr': 4.8651861245144206e-06, 'samples': 23995904, 'steps': 46866, 'batch_loss/train': 0.7558006262406707} +12/29/2021 03:12:23 - INFO - codeparrot_training - Step 46867: {'lr': 4.86209194406459e-06, 'samples': 23996416, 'steps': 46867, 'batch_loss/train': 0.7716923881089315} +12/29/2021 03:12:36 - INFO - codeparrot_training - Step 46868: {'lr': 4.85899873819054e-06, 'samples': 23996928, 'steps': 46868, 'batch_loss/train': 0.736548843793571} +12/29/2021 03:12:47 - INFO - codeparrot_training - Step 46869: {'lr': 4.855906506904512e-06, 'samples': 23997440, 'steps': 46869, 'batch_loss/train': 0.6777945334324613} +12/29/2021 03:12:57 - INFO - codeparrot_training - Step 46870: {'lr': 4.852815250218856e-06, 'samples': 23997952, 'steps': 46870, 'batch_loss/train': 0.7607456743717194} +12/29/2021 03:13:10 - INFO - codeparrot_training - Step 46871: {'lr': 4.849724968145869e-06, 'samples': 23998464, 'steps': 46871, 'batch_loss/train': 0.7772658041212708} +12/29/2021 03:13:20 - INFO - codeparrot_training - Step 46872: {'lr': 4.8466356606977625e-06, 'samples': 23998976, 'steps': 46872, 'batch_loss/train': 0.7782066576182842} +12/29/2021 03:13:31 - INFO - codeparrot_training - Step 46873: {'lr': 4.843547327886888e-06, 'samples': 23999488, 'steps': 46873, 'batch_loss/train': 0.7037001738208346} +12/29/2021 03:13:43 - INFO - codeparrot_training - Step 46874: {'lr': 4.840459969725514e-06, 'samples': 24000000, 'steps': 46874, 'batch_loss/train': 0.5516919377841987} +12/29/2021 03:13:54 - INFO - codeparrot_training - Step 46875: {'lr': 4.837373586225907e-06, 'samples': 24000512, 'steps': 46875, 'batch_loss/train': 0.7685356182046235} +12/29/2021 03:14:04 - INFO - codeparrot_training - Step 46876: {'lr': 4.834288177400309e-06, 'samples': 24001024, 'steps': 46876, 'batch_loss/train': 0.748938157223165} +12/29/2021 03:14:15 - INFO - codeparrot_training - Step 46877: {'lr': 4.831203743261014e-06, 'samples': 24001536, 'steps': 46877, 'batch_loss/train': 0.7403732948005199} +12/29/2021 03:14:28 - INFO - codeparrot_training - Step 46878: {'lr': 4.828120283820292e-06, 'samples': 24002048, 'steps': 46878, 'batch_loss/train': 0.7344829358626157} +12/29/2021 03:14:38 - INFO - codeparrot_training - Step 46879: {'lr': 4.825037799090382e-06, 'samples': 24002560, 'steps': 46879, 'batch_loss/train': 0.756141773192212} +12/29/2021 03:14:49 - INFO - codeparrot_training - Step 46880: {'lr': 4.821956289083551e-06, 'samples': 24003072, 'steps': 46880, 'batch_loss/train': 0.7671863930299878} +12/29/2021 03:15:01 - INFO - codeparrot_training - Step 46881: {'lr': 4.8188757538120405e-06, 'samples': 24003584, 'steps': 46881, 'batch_loss/train': 0.7881047707051039} +12/29/2021 03:15:12 - INFO - codeparrot_training - Step 46882: {'lr': 4.815796193288091e-06, 'samples': 24004096, 'steps': 46882, 'batch_loss/train': 0.7162752067670226} +12/29/2021 03:15:23 - INFO - codeparrot_training - Step 46883: {'lr': 4.81271760752397e-06, 'samples': 24004608, 'steps': 46883, 'batch_loss/train': 0.852236564271152} +12/29/2021 03:15:36 - INFO - codeparrot_training - Step 46884: {'lr': 4.809639996531917e-06, 'samples': 24005120, 'steps': 46884, 'batch_loss/train': 0.4655806990340352} +12/29/2021 03:15:46 - INFO - codeparrot_training - Step 46885: {'lr': 4.806563360324173e-06, 'samples': 24005632, 'steps': 46885, 'batch_loss/train': 0.7897071307525039} +12/29/2021 03:15:57 - INFO - codeparrot_training - Step 46886: {'lr': 4.803487698912923e-06, 'samples': 24006144, 'steps': 46886, 'batch_loss/train': 0.8770369396079332} +12/29/2021 03:16:08 - INFO - codeparrot_training - Step 46887: {'lr': 4.800413012310406e-06, 'samples': 24006656, 'steps': 46887, 'batch_loss/train': 0.696841552387923} +12/29/2021 03:16:20 - INFO - codeparrot_training - Step 46888: {'lr': 4.797339300528891e-06, 'samples': 24007168, 'steps': 46888, 'batch_loss/train': 1.0314320367760956} +12/29/2021 03:16:30 - INFO - codeparrot_training - Step 46889: {'lr': 4.794266563580563e-06, 'samples': 24007680, 'steps': 46889, 'batch_loss/train': 1.0236200061626732} +12/29/2021 03:16:41 - INFO - codeparrot_training - Step 46890: {'lr': 4.7911948014776606e-06, 'samples': 24008192, 'steps': 46890, 'batch_loss/train': 0.6665377243189141} +12/29/2021 03:16:53 - INFO - codeparrot_training - Step 46891: {'lr': 4.788124014232342e-06, 'samples': 24008704, 'steps': 46891, 'batch_loss/train': 0.7297729328274727} +12/29/2021 03:17:04 - INFO - codeparrot_training - Step 46892: {'lr': 4.785054201856876e-06, 'samples': 24009216, 'steps': 46892, 'batch_loss/train': 0.7204521866515279} +12/29/2021 03:17:15 - INFO - codeparrot_training - Step 46893: {'lr': 4.781985364363445e-06, 'samples': 24009728, 'steps': 46893, 'batch_loss/train': 0.814561880659312} +12/29/2021 03:17:27 - INFO - codeparrot_training - Step 46894: {'lr': 4.7789175017642905e-06, 'samples': 24010240, 'steps': 46894, 'batch_loss/train': 0.6717323162592947} +12/29/2021 03:17:38 - INFO - codeparrot_training - Step 46895: {'lr': 4.775850614071486e-06, 'samples': 24010752, 'steps': 46895, 'batch_loss/train': 0.7851888127624989} +12/29/2021 03:17:49 - INFO - codeparrot_training - Step 46896: {'lr': 4.772784701297356e-06, 'samples': 24011264, 'steps': 46896, 'batch_loss/train': 0.8140816632658243} +12/29/2021 03:17:59 - INFO - codeparrot_training - Step 46897: {'lr': 4.7697197634540555e-06, 'samples': 24011776, 'steps': 46897, 'batch_loss/train': 0.7318855365738273} +12/29/2021 03:18:12 - INFO - codeparrot_training - Step 46898: {'lr': 4.766655800553688e-06, 'samples': 24012288, 'steps': 46898, 'batch_loss/train': 0.7327809575945139} +12/29/2021 03:18:22 - INFO - codeparrot_training - Step 46899: {'lr': 4.763592812608547e-06, 'samples': 24012800, 'steps': 46899, 'batch_loss/train': 0.6586694438010454} +12/29/2021 03:18:33 - INFO - codeparrot_training - Step 46900: {'lr': 4.760530799630791e-06, 'samples': 24013312, 'steps': 46900, 'batch_loss/train': 0.7507996172644198} +12/29/2021 03:18:45 - INFO - codeparrot_training - Step 46901: {'lr': 4.7574697616325215e-06, 'samples': 24013824, 'steps': 46901, 'batch_loss/train': 0.7138861711136997} +12/29/2021 03:18:56 - INFO - codeparrot_training - Step 46902: {'lr': 4.75440969862595e-06, 'samples': 24014336, 'steps': 46902, 'batch_loss/train': 0.7058553980314173} +12/29/2021 03:19:06 - INFO - codeparrot_training - Step 46903: {'lr': 4.751350610623261e-06, 'samples': 24014848, 'steps': 46903, 'batch_loss/train': 0.719870962202549} +12/29/2021 03:19:18 - INFO - codeparrot_training - Step 46904: {'lr': 4.748292497636614e-06, 'samples': 24015360, 'steps': 46904, 'batch_loss/train': 0.7473260283004493} +12/29/2021 03:19:29 - INFO - codeparrot_training - Step 46905: {'lr': 4.745235359678135e-06, 'samples': 24015872, 'steps': 46905, 'batch_loss/train': 0.7329400068847463} +12/29/2021 03:19:40 - INFO - codeparrot_training - Step 46906: {'lr': 4.742179196759983e-06, 'samples': 24016384, 'steps': 46906, 'batch_loss/train': 0.704409662168473} +12/29/2021 03:19:52 - INFO - codeparrot_training - Step 46907: {'lr': 4.739124008894341e-06, 'samples': 24016896, 'steps': 46907, 'batch_loss/train': 0.7246380220167339} +12/29/2021 03:20:03 - INFO - codeparrot_training - Step 46908: {'lr': 4.73606979609334e-06, 'samples': 24017408, 'steps': 46908, 'batch_loss/train': 0.7419342123903334} +12/29/2021 03:20:14 - INFO - codeparrot_training - Step 46909: {'lr': 4.73301655836908e-06, 'samples': 24017920, 'steps': 46909, 'batch_loss/train': 0.8083935417234898} +12/29/2021 03:20:24 - INFO - codeparrot_training - Step 46910: {'lr': 4.729964295733774e-06, 'samples': 24018432, 'steps': 46910, 'batch_loss/train': 0.7619129605591297} +12/29/2021 03:20:37 - INFO - codeparrot_training - Step 46911: {'lr': 4.726913008199524e-06, 'samples': 24018944, 'steps': 46911, 'batch_loss/train': 0.8177265804260969} +12/29/2021 03:20:47 - INFO - codeparrot_training - Step 46912: {'lr': 4.7238626957784306e-06, 'samples': 24019456, 'steps': 46912, 'batch_loss/train': 0.7145504420623183} +12/29/2021 03:20:58 - INFO - codeparrot_training - Step 46913: {'lr': 4.7208133584826505e-06, 'samples': 24019968, 'steps': 46913, 'batch_loss/train': 0.8058570944704115} +12/29/2021 03:21:11 - INFO - codeparrot_training - Step 46914: {'lr': 4.717764996324314e-06, 'samples': 24020480, 'steps': 46914, 'batch_loss/train': 0.6729938853532076} +12/29/2021 03:21:21 - INFO - codeparrot_training - Step 46915: {'lr': 4.714717609315522e-06, 'samples': 24020992, 'steps': 46915, 'batch_loss/train': 0.8158202990889549} +12/29/2021 03:21:32 - INFO - codeparrot_training - Step 46916: {'lr': 4.711671197468403e-06, 'samples': 24021504, 'steps': 46916, 'batch_loss/train': 0.7749120723456144} +12/29/2021 03:21:44 - INFO - codeparrot_training - Step 46917: {'lr': 4.70862576079506e-06, 'samples': 24022016, 'steps': 46917, 'batch_loss/train': 0.6997431670315564} +12/29/2021 03:21:55 - INFO - codeparrot_training - Step 46918: {'lr': 4.705581299307593e-06, 'samples': 24022528, 'steps': 46918, 'batch_loss/train': 0.8007563222199678} +12/29/2021 03:22:05 - INFO - codeparrot_training - Step 46919: {'lr': 4.702537813018132e-06, 'samples': 24023040, 'steps': 46919, 'batch_loss/train': 0.7451327508315444} +12/29/2021 03:22:16 - INFO - codeparrot_training - Step 46920: {'lr': 4.69949530193875e-06, 'samples': 24023552, 'steps': 46920, 'batch_loss/train': 0.8323976192623377} +12/29/2021 03:22:28 - INFO - codeparrot_training - Step 46921: {'lr': 4.696453766081549e-06, 'samples': 24024064, 'steps': 46921, 'batch_loss/train': 0.6502983670216054} +12/29/2021 03:22:39 - INFO - codeparrot_training - Step 46922: {'lr': 4.6934132054586296e-06, 'samples': 24024576, 'steps': 46922, 'batch_loss/train': 0.7392147481441498} +12/29/2021 03:22:49 - INFO - codeparrot_training - Step 46923: {'lr': 4.690373620082095e-06, 'samples': 24025088, 'steps': 46923, 'batch_loss/train': 0.6281072865240276} +12/29/2021 03:23:02 - INFO - codeparrot_training - Step 46924: {'lr': 4.687335009963961e-06, 'samples': 24025600, 'steps': 46924, 'batch_loss/train': 0.7101561052259058} +12/29/2021 03:23:12 - INFO - codeparrot_training - Step 46925: {'lr': 4.684297375116386e-06, 'samples': 24026112, 'steps': 46925, 'batch_loss/train': 0.6606370112858713} +12/29/2021 03:23:23 - INFO - codeparrot_training - Step 46926: {'lr': 4.681260715551416e-06, 'samples': 24026624, 'steps': 46926, 'batch_loss/train': 0.7104036207310855} +12/29/2021 03:23:35 - INFO - codeparrot_training - Step 46927: {'lr': 4.6782250312811245e-06, 'samples': 24027136, 'steps': 46927, 'batch_loss/train': 0.7269873498007655} +12/29/2021 03:23:46 - INFO - codeparrot_training - Step 46928: {'lr': 4.675190322317558e-06, 'samples': 24027648, 'steps': 46928, 'batch_loss/train': 0.7822473747655749} +12/29/2021 03:23:57 - INFO - codeparrot_training - Step 46929: {'lr': 4.672156588672844e-06, 'samples': 24028160, 'steps': 46929, 'batch_loss/train': 0.7944977944716811} +12/29/2021 03:24:07 - INFO - codeparrot_training - Step 46930: {'lr': 4.6691238303590015e-06, 'samples': 24028672, 'steps': 46930, 'batch_loss/train': 0.5030607416993007} +12/29/2021 03:24:20 - INFO - codeparrot_training - Step 46931: {'lr': 4.666092047388048e-06, 'samples': 24029184, 'steps': 46931, 'batch_loss/train': 0.7276636427268386} +12/29/2021 03:24:31 - INFO - codeparrot_training - Step 46932: {'lr': 4.663061239772115e-06, 'samples': 24029696, 'steps': 46932, 'batch_loss/train': 0.7253453517332673} +12/29/2021 03:24:41 - INFO - codeparrot_training - Step 46933: {'lr': 4.660031407523191e-06, 'samples': 24030208, 'steps': 46933, 'batch_loss/train': 0.8054408659227192} +12/29/2021 03:24:53 - INFO - codeparrot_training - Step 46934: {'lr': 4.65700255065335e-06, 'samples': 24030720, 'steps': 46934, 'batch_loss/train': 0.7130867121741176} +12/29/2021 03:25:04 - INFO - codeparrot_training - Step 46935: {'lr': 4.653974669174638e-06, 'samples': 24031232, 'steps': 46935, 'batch_loss/train': 0.7605660003609955} +12/29/2021 03:25:15 - INFO - codeparrot_training - Step 46936: {'lr': 4.650947763099101e-06, 'samples': 24031744, 'steps': 46936, 'batch_loss/train': 0.7006240841001272} +12/29/2021 03:25:27 - INFO - codeparrot_training - Step 46937: {'lr': 4.647921832438729e-06, 'samples': 24032256, 'steps': 46937, 'batch_loss/train': 0.6825501807034016} +12/29/2021 03:25:37 - INFO - codeparrot_training - Step 46938: {'lr': 4.644896877205596e-06, 'samples': 24032768, 'steps': 46938, 'batch_loss/train': 0.8169473155867308} +12/29/2021 03:25:48 - INFO - codeparrot_training - Step 46939: {'lr': 4.641872897411665e-06, 'samples': 24033280, 'steps': 46939, 'batch_loss/train': 0.794648420996964} +12/29/2021 03:25:59 - INFO - codeparrot_training - Step 46940: {'lr': 4.638849893069064e-06, 'samples': 24033792, 'steps': 46940, 'batch_loss/train': 0.6690927958115935} +12/29/2021 03:26:11 - INFO - codeparrot_training - Step 46941: {'lr': 4.635827864189729e-06, 'samples': 24034304, 'steps': 46941, 'batch_loss/train': 0.7581865731626749} +12/29/2021 03:26:21 - INFO - codeparrot_training - Step 46942: {'lr': 4.632806810785706e-06, 'samples': 24034816, 'steps': 46942, 'batch_loss/train': 0.7472189799882472} +12/29/2021 03:26:32 - INFO - codeparrot_training - Step 46943: {'lr': 4.6297867328689566e-06, 'samples': 24035328, 'steps': 46943, 'batch_loss/train': 0.7344433418475091} +12/29/2021 03:26:45 - INFO - codeparrot_training - Step 46944: {'lr': 4.626767630451556e-06, 'samples': 24035840, 'steps': 46944, 'batch_loss/train': 0.7515514697879553} +12/29/2021 03:26:56 - INFO - codeparrot_training - Step 46945: {'lr': 4.6237495035454934e-06, 'samples': 24036352, 'steps': 46945, 'batch_loss/train': 0.48915063275489956} +12/29/2021 03:27:06 - INFO - codeparrot_training - Step 46946: {'lr': 4.620732352162732e-06, 'samples': 24036864, 'steps': 46946, 'batch_loss/train': 0.675732898293063} +12/29/2021 03:27:18 - INFO - codeparrot_training - Step 46947: {'lr': 4.6177161763152895e-06, 'samples': 24037376, 'steps': 46947, 'batch_loss/train': 0.7427357165142894} +12/29/2021 03:27:29 - INFO - codeparrot_training - Step 46948: {'lr': 4.614700976015129e-06, 'samples': 24037888, 'steps': 46948, 'batch_loss/train': 0.6149739188840613} +12/29/2021 03:27:40 - INFO - codeparrot_training - Step 46949: {'lr': 4.6116867512743245e-06, 'samples': 24038400, 'steps': 46949, 'batch_loss/train': 0.7907123158220202} +12/29/2021 03:27:52 - INFO - codeparrot_training - Step 46950: {'lr': 4.6086735021047266e-06, 'samples': 24038912, 'steps': 46950, 'batch_loss/train': 0.5517462189309299} +12/29/2021 03:28:03 - INFO - codeparrot_training - Step 46951: {'lr': 4.605661228518438e-06, 'samples': 24039424, 'steps': 46951, 'batch_loss/train': 0.7556305108591914} +12/29/2021 03:28:13 - INFO - codeparrot_training - Step 46952: {'lr': 4.602649930527392e-06, 'samples': 24039936, 'steps': 46952, 'batch_loss/train': 0.6833119327202439} +12/29/2021 03:28:24 - INFO - codeparrot_training - Step 46953: {'lr': 4.599639608143524e-06, 'samples': 24040448, 'steps': 46953, 'batch_loss/train': 0.852120999712497} +12/29/2021 03:28:37 - INFO - codeparrot_training - Step 46954: {'lr': 4.596630261378798e-06, 'samples': 24040960, 'steps': 46954, 'batch_loss/train': 0.685736624058336} +12/29/2021 03:28:47 - INFO - codeparrot_training - Step 46955: {'lr': 4.593621890245258e-06, 'samples': 24041472, 'steps': 46955, 'batch_loss/train': 0.7921067057177424} +12/29/2021 03:28:58 - INFO - codeparrot_training - Step 46956: {'lr': 4.590614494754813e-06, 'samples': 24041984, 'steps': 46956, 'batch_loss/train': 0.7545939953997731} +12/29/2021 03:29:10 - INFO - codeparrot_training - Step 46957: {'lr': 4.587608074919397e-06, 'samples': 24042496, 'steps': 46957, 'batch_loss/train': 0.7763643469661474} +12/29/2021 03:29:21 - INFO - codeparrot_training - Step 46958: {'lr': 4.584602630751e-06, 'samples': 24043008, 'steps': 46958, 'batch_loss/train': 0.7108146185055375} +12/29/2021 03:29:32 - INFO - codeparrot_training - Step 46959: {'lr': 4.581598162261558e-06, 'samples': 24043520, 'steps': 46959, 'batch_loss/train': 0.5895618931390345} +12/29/2021 03:29:44 - INFO - codeparrot_training - Step 46960: {'lr': 4.578594669463032e-06, 'samples': 24044032, 'steps': 46960, 'batch_loss/train': 0.5492544794105925} +12/29/2021 03:29:54 - INFO - codeparrot_training - Step 46961: {'lr': 4.5755921523673304e-06, 'samples': 24044544, 'steps': 46961, 'batch_loss/train': 0.8672537235543132} +12/29/2021 03:30:05 - INFO - codeparrot_training - Step 46962: {'lr': 4.572590610986388e-06, 'samples': 24045056, 'steps': 46962, 'batch_loss/train': 0.7667274628765881} +12/29/2021 03:30:16 - INFO - codeparrot_training - Step 46963: {'lr': 4.569590045332167e-06, 'samples': 24045568, 'steps': 46963, 'batch_loss/train': 0.6915867399657145} +12/29/2021 03:30:28 - INFO - codeparrot_training - Step 46964: {'lr': 4.56659045541663e-06, 'samples': 24046080, 'steps': 46964, 'batch_loss/train': 0.7714840031694621} +12/29/2021 03:30:39 - INFO - codeparrot_training - Step 46965: {'lr': 4.563591841251574e-06, 'samples': 24046592, 'steps': 46965, 'batch_loss/train': 0.6261070067994297} +12/29/2021 03:30:50 - INFO - codeparrot_training - Step 46966: {'lr': 4.560594202849072e-06, 'samples': 24047104, 'steps': 46966, 'batch_loss/train': 0.7405787231400609} +12/29/2021 03:31:02 - INFO - codeparrot_training - Step 46967: {'lr': 4.557597540220948e-06, 'samples': 24047616, 'steps': 46967, 'batch_loss/train': 0.7602063240483403} +12/29/2021 03:31:12 - INFO - codeparrot_training - Step 46968: {'lr': 4.5546018533791364e-06, 'samples': 24048128, 'steps': 46968, 'batch_loss/train': 0.5827896525152028} +12/29/2021 03:31:23 - INFO - codeparrot_training - Step 46969: {'lr': 4.551607142335573e-06, 'samples': 24048640, 'steps': 46969, 'batch_loss/train': 0.7310543488711119} +12/29/2021 03:31:36 - INFO - codeparrot_training - Step 46970: {'lr': 4.548613407102109e-06, 'samples': 24049152, 'steps': 46970, 'batch_loss/train': 0.7373761786147952} +12/29/2021 03:31:46 - INFO - codeparrot_training - Step 46971: {'lr': 4.545620647690707e-06, 'samples': 24049664, 'steps': 46971, 'batch_loss/train': 0.7014385531656444} +12/29/2021 03:31:57 - INFO - codeparrot_training - Step 46972: {'lr': 4.542628864113219e-06, 'samples': 24050176, 'steps': 46972, 'batch_loss/train': 1.0236745541915298} +12/29/2021 03:32:08 - INFO - codeparrot_training - Step 46973: {'lr': 4.539638056381551e-06, 'samples': 24050688, 'steps': 46973, 'batch_loss/train': 0.6611023084260523} +12/29/2021 03:32:20 - INFO - codeparrot_training - Step 46974: {'lr': 4.536648224507611e-06, 'samples': 24051200, 'steps': 46974, 'batch_loss/train': 0.5722799907671288} +12/29/2021 03:32:30 - INFO - codeparrot_training - Step 46975: {'lr': 4.5336593685033064e-06, 'samples': 24051712, 'steps': 46975, 'batch_loss/train': 0.7007420370355248} +12/29/2021 03:32:41 - INFO - codeparrot_training - Step 46976: {'lr': 4.530671488380433e-06, 'samples': 24052224, 'steps': 46976, 'batch_loss/train': 0.9203477697446942} +12/29/2021 03:32:53 - INFO - codeparrot_training - Step 46977: {'lr': 4.527684584150954e-06, 'samples': 24052736, 'steps': 46977, 'batch_loss/train': 0.732332234736532} +12/29/2021 03:33:04 - INFO - codeparrot_training - Step 46978: {'lr': 4.52469865582672e-06, 'samples': 24053248, 'steps': 46978, 'batch_loss/train': 0.7057340820319951} +12/29/2021 03:33:14 - INFO - codeparrot_training - Step 46979: {'lr': 4.521713703419583e-06, 'samples': 24053760, 'steps': 46979, 'batch_loss/train': 0.6616790974512696} +12/29/2021 03:33:27 - INFO - codeparrot_training - Step 46980: {'lr': 4.518729726941423e-06, 'samples': 24054272, 'steps': 46980, 'batch_loss/train': 0.6755416780943051} +12/29/2021 03:33:37 - INFO - codeparrot_training - Step 46981: {'lr': 4.515746726404119e-06, 'samples': 24054784, 'steps': 46981, 'batch_loss/train': 0.7809590417891741} +12/29/2021 03:33:48 - INFO - codeparrot_training - Step 46982: {'lr': 4.512764701819494e-06, 'samples': 24055296, 'steps': 46982, 'batch_loss/train': 0.7084564450196922} +12/29/2021 03:33:59 - INFO - codeparrot_training - Step 46983: {'lr': 4.5097836531994e-06, 'samples': 24055808, 'steps': 46983, 'batch_loss/train': 0.6811120097991079} +12/29/2021 03:34:13 - INFO - codeparrot_training - Step 46984: {'lr': 4.5068035805557735e-06, 'samples': 24056320, 'steps': 46984, 'batch_loss/train': 0.6729829241521657} +12/29/2021 03:34:23 - INFO - codeparrot_training - Step 46985: {'lr': 4.503824483900382e-06, 'samples': 24056832, 'steps': 46985, 'batch_loss/train': 0.6274387962184846} +12/29/2021 03:34:34 - INFO - codeparrot_training - Step 46986: {'lr': 4.500846363245076e-06, 'samples': 24057344, 'steps': 46986, 'batch_loss/train': 0.6270579337142408} +12/29/2021 03:34:46 - INFO - codeparrot_training - Step 46987: {'lr': 4.497869218601708e-06, 'samples': 24057856, 'steps': 46987, 'batch_loss/train': 0.7714504506438971} +12/29/2021 03:34:57 - INFO - codeparrot_training - Step 46988: {'lr': 4.494893049982101e-06, 'samples': 24058368, 'steps': 46988, 'batch_loss/train': 0.812314978800714} +12/29/2021 03:35:07 - INFO - codeparrot_training - Step 46989: {'lr': 4.491917857398137e-06, 'samples': 24058880, 'steps': 46989, 'batch_loss/train': 0.6951979291625321} +12/29/2021 03:35:21 - INFO - codeparrot_training - Step 46990: {'lr': 4.488943640861581e-06, 'samples': 24059392, 'steps': 46990, 'batch_loss/train': 0.6367197320796549} +12/29/2021 03:35:32 - INFO - codeparrot_training - Step 46991: {'lr': 4.485970400384287e-06, 'samples': 24059904, 'steps': 46991, 'batch_loss/train': 0.6926439446397126} +12/29/2021 03:35:43 - INFO - codeparrot_training - Step 46992: {'lr': 4.482998135978078e-06, 'samples': 24060416, 'steps': 46992, 'batch_loss/train': 0.5322978736367077} +12/29/2021 03:35:53 - INFO - codeparrot_training - Step 46993: {'lr': 4.480026847654778e-06, 'samples': 24060928, 'steps': 46993, 'batch_loss/train': 0.7072392527479678} +12/29/2021 03:36:06 - INFO - codeparrot_training - Step 46994: {'lr': 4.477056535426127e-06, 'samples': 24061440, 'steps': 46994, 'batch_loss/train': 0.7110213842242956} +12/29/2021 03:36:17 - INFO - codeparrot_training - Step 46995: {'lr': 4.474087199304061e-06, 'samples': 24061952, 'steps': 46995, 'batch_loss/train': 0.740003221668303} +12/29/2021 03:36:27 - INFO - codeparrot_training - Step 46996: {'lr': 4.471118839300292e-06, 'samples': 24062464, 'steps': 46996, 'batch_loss/train': 0.4933004272170365} +12/29/2021 03:36:39 - INFO - codeparrot_training - Step 46997: {'lr': 4.468151455426644e-06, 'samples': 24062976, 'steps': 46997, 'batch_loss/train': 0.47957419918384403} +12/29/2021 03:36:50 - INFO - codeparrot_training - Step 46998: {'lr': 4.465185047694914e-06, 'samples': 24063488, 'steps': 46998, 'batch_loss/train': 0.5805241279304028} +12/29/2021 03:37:01 - INFO - codeparrot_training - Step 46999: {'lr': 4.462219616116925e-06, 'samples': 24064000, 'steps': 46999, 'batch_loss/train': 0.7415673154173419} +12/29/2021 03:37:14 - INFO - codeparrot_training - Step 47000: {'lr': 4.459255160704418e-06, 'samples': 24064512, 'steps': 47000, 'batch_loss/train': 0.6985687459819019} +12/29/2021 03:37:25 - INFO - codeparrot_training - Step 47001: {'lr': 4.456291681469215e-06, 'samples': 24065024, 'steps': 47001, 'batch_loss/train': 0.6911696721799672} +12/29/2021 03:37:36 - INFO - codeparrot_training - Step 47002: {'lr': 4.453329178423032e-06, 'samples': 24065536, 'steps': 47002, 'batch_loss/train': 0.6364036403829232} +12/29/2021 03:37:48 - INFO - codeparrot_training - Step 47003: {'lr': 4.450367651577747e-06, 'samples': 24066048, 'steps': 47003, 'batch_loss/train': 0.6683314276160672} +12/29/2021 03:37:58 - INFO - codeparrot_training - Step 47004: {'lr': 4.447407100945073e-06, 'samples': 24066560, 'steps': 47004, 'batch_loss/train': 0.6818875130265951} +12/29/2021 03:38:09 - INFO - codeparrot_training - Step 47005: {'lr': 4.444447526536805e-06, 'samples': 24067072, 'steps': 47005, 'batch_loss/train': 0.6997524921316653} +12/29/2021 03:38:20 - INFO - codeparrot_training - Step 47006: {'lr': 4.441488928364656e-06, 'samples': 24067584, 'steps': 47006, 'batch_loss/train': 0.814353596419096} +12/29/2021 03:38:34 - INFO - codeparrot_training - Step 47007: {'lr': 4.438531306440452e-06, 'samples': 24068096, 'steps': 47007, 'batch_loss/train': 0.6887784665450454} +12/29/2021 03:38:44 - INFO - codeparrot_training - Step 47008: {'lr': 4.435574660775932e-06, 'samples': 24068608, 'steps': 47008, 'batch_loss/train': 0.8069716272875667} +12/29/2021 03:38:55 - INFO - codeparrot_training - Step 47009: {'lr': 4.432618991382809e-06, 'samples': 24069120, 'steps': 47009, 'batch_loss/train': 0.676273787394166} +12/29/2021 03:39:07 - INFO - codeparrot_training - Step 47010: {'lr': 4.429664298272934e-06, 'samples': 24069632, 'steps': 47010, 'batch_loss/train': 0.6415237910114229} +12/29/2021 03:39:18 - INFO - codeparrot_training - Step 47011: {'lr': 4.426710581457938e-06, 'samples': 24070144, 'steps': 47011, 'batch_loss/train': 0.7328588063828647} +12/29/2021 03:39:28 - INFO - codeparrot_training - Step 47012: {'lr': 4.423757840949644e-06, 'samples': 24070656, 'steps': 47012, 'batch_loss/train': 0.6869487725198269} +12/29/2021 03:39:40 - INFO - codeparrot_training - Step 47013: {'lr': 4.4208060767597645e-06, 'samples': 24071168, 'steps': 47013, 'batch_loss/train': 0.7452283641323447} +12/29/2021 03:39:51 - INFO - codeparrot_training - Step 47014: {'lr': 4.417855288900013e-06, 'samples': 24071680, 'steps': 47014, 'batch_loss/train': 0.6670300173573196} +12/29/2021 03:40:02 - INFO - codeparrot_training - Step 47015: {'lr': 4.414905477382131e-06, 'samples': 24072192, 'steps': 47015, 'batch_loss/train': 0.7910233444999903} +12/29/2021 03:40:12 - INFO - codeparrot_training - Step 47016: {'lr': 4.411956642217912e-06, 'samples': 24072704, 'steps': 47016, 'batch_loss/train': 0.7029074807651341} +12/29/2021 03:40:26 - INFO - codeparrot_training - Step 47017: {'lr': 4.409008783418961e-06, 'samples': 24073216, 'steps': 47017, 'batch_loss/train': 0.7185123804956675} +12/29/2021 03:40:37 - INFO - codeparrot_training - Step 47018: {'lr': 4.406061900997072e-06, 'samples': 24073728, 'steps': 47018, 'batch_loss/train': 0.6671990351751447} +12/29/2021 03:40:48 - INFO - codeparrot_training - Step 47019: {'lr': 4.403115994963986e-06, 'samples': 24074240, 'steps': 47019, 'batch_loss/train': 0.6532104192301631} +12/29/2021 03:41:00 - INFO - codeparrot_training - Step 47020: {'lr': 4.400171065331332e-06, 'samples': 24074752, 'steps': 47020, 'batch_loss/train': 0.652703178639058} +12/29/2021 03:41:10 - INFO - codeparrot_training - Step 47021: {'lr': 4.39722711211088e-06, 'samples': 24075264, 'steps': 47021, 'batch_loss/train': 0.6773668886162341} +12/29/2021 03:41:21 - INFO - codeparrot_training - Step 47022: {'lr': 4.394284135314342e-06, 'samples': 24075776, 'steps': 47022, 'batch_loss/train': 0.7002330070827156} +12/29/2021 03:41:33 - INFO - codeparrot_training - Step 47023: {'lr': 4.391342134953347e-06, 'samples': 24076288, 'steps': 47023, 'batch_loss/train': 0.705099037848413} +12/29/2021 03:41:44 - INFO - codeparrot_training - Step 47024: {'lr': 4.388401111039664e-06, 'samples': 24076800, 'steps': 47024, 'batch_loss/train': 0.6664010393433273} +12/29/2021 03:41:54 - INFO - codeparrot_training - Step 47025: {'lr': 4.3854610635849505e-06, 'samples': 24077312, 'steps': 47025, 'batch_loss/train': 0.7327969213947654} +12/29/2021 03:42:06 - INFO - codeparrot_training - Step 47026: {'lr': 4.382521992600918e-06, 'samples': 24077824, 'steps': 47026, 'batch_loss/train': 0.654117812635377} +12/29/2021 03:42:17 - INFO - codeparrot_training - Step 47027: {'lr': 4.379583898099227e-06, 'samples': 24078336, 'steps': 47027, 'batch_loss/train': 0.5623639028635807} +12/29/2021 03:42:28 - INFO - codeparrot_training - Step 47028: {'lr': 4.376646780091559e-06, 'samples': 24078848, 'steps': 47028, 'batch_loss/train': 0.5694429511204362} +12/29/2021 03:42:38 - INFO - codeparrot_training - Step 47029: {'lr': 4.373710638589601e-06, 'samples': 24079360, 'steps': 47029, 'batch_loss/train': 0.6439855601638556} +12/29/2021 03:42:52 - INFO - codeparrot_training - Step 47030: {'lr': 4.370775473605065e-06, 'samples': 24079872, 'steps': 47030, 'batch_loss/train': 0.7771761454641819} +12/29/2021 03:43:03 - INFO - codeparrot_training - Step 47031: {'lr': 4.367841285149526e-06, 'samples': 24080384, 'steps': 47031, 'batch_loss/train': 0.7720392236951739} +12/29/2021 03:43:13 - INFO - codeparrot_training - Step 47032: {'lr': 4.364908073234697e-06, 'samples': 24080896, 'steps': 47032, 'batch_loss/train': 0.6581936464644969} +12/29/2021 03:43:26 - INFO - codeparrot_training - Step 47033: {'lr': 4.361975837872289e-06, 'samples': 24081408, 'steps': 47033, 'batch_loss/train': 0.9287243648432195} +12/29/2021 03:43:36 - INFO - codeparrot_training - Step 47034: {'lr': 4.359044579073906e-06, 'samples': 24081920, 'steps': 47034, 'batch_loss/train': 0.7144563104957342} +12/29/2021 03:43:47 - INFO - codeparrot_training - Step 47035: {'lr': 4.3561142968511765e-06, 'samples': 24082432, 'steps': 47035, 'batch_loss/train': 0.6954942094162107} +12/29/2021 03:44:01 - INFO - codeparrot_training - Step 47036: {'lr': 4.353184991215841e-06, 'samples': 24082944, 'steps': 47036, 'batch_loss/train': 0.6870319857262075} +12/29/2021 03:44:11 - INFO - codeparrot_training - Step 47037: {'lr': 4.350256662179447e-06, 'samples': 24083456, 'steps': 47037, 'batch_loss/train': 0.7186131924390793} +12/29/2021 03:44:22 - INFO - codeparrot_training - Step 47038: {'lr': 4.347329309753706e-06, 'samples': 24083968, 'steps': 47038, 'batch_loss/train': 0.7190938699059188} +12/29/2021 03:44:33 - INFO - codeparrot_training - Step 47039: {'lr': 4.34440293395022e-06, 'samples': 24084480, 'steps': 47039, 'batch_loss/train': 0.789157644379884} +12/29/2021 03:44:45 - INFO - codeparrot_training - Step 47040: {'lr': 4.341477534780619e-06, 'samples': 24084992, 'steps': 47040, 'batch_loss/train': 0.7297925725579262} +12/29/2021 03:44:56 - INFO - codeparrot_training - Step 47041: {'lr': 4.338553112256533e-06, 'samples': 24085504, 'steps': 47041, 'batch_loss/train': 0.33331103666569106} +12/29/2021 03:45:06 - INFO - codeparrot_training - Step 47042: {'lr': 4.335629666389646e-06, 'samples': 24086016, 'steps': 47042, 'batch_loss/train': 0.6388934537535533} +12/29/2021 03:45:19 - INFO - codeparrot_training - Step 47043: {'lr': 4.332707197191505e-06, 'samples': 24086528, 'steps': 47043, 'batch_loss/train': 0.6825982625596225} +12/29/2021 03:45:29 - INFO - codeparrot_training - Step 47044: {'lr': 4.329785704673767e-06, 'samples': 24087040, 'steps': 47044, 'batch_loss/train': 0.6970622779335827} +12/29/2021 03:45:40 - INFO - codeparrot_training - Step 47045: {'lr': 4.326865188848062e-06, 'samples': 24087552, 'steps': 47045, 'batch_loss/train': 0.5135391642106697} +12/29/2021 03:45:54 - INFO - codeparrot_training - Step 47046: {'lr': 4.323945649725935e-06, 'samples': 24088064, 'steps': 47046, 'batch_loss/train': 0.7133845770731568} +12/29/2021 03:46:04 - INFO - codeparrot_training - Step 47047: {'lr': 4.321027087319073e-06, 'samples': 24088576, 'steps': 47047, 'batch_loss/train': 0.7290943232364953} +12/29/2021 03:46:15 - INFO - codeparrot_training - Step 47048: {'lr': 4.318109501639023e-06, 'samples': 24089088, 'steps': 47048, 'batch_loss/train': 0.7091836594045162} +12/29/2021 03:46:27 - INFO - codeparrot_training - Step 47049: {'lr': 4.315192892697411e-06, 'samples': 24089600, 'steps': 47049, 'batch_loss/train': 0.6863123932853341} +12/29/2021 03:46:38 - INFO - codeparrot_training - Step 47050: {'lr': 4.3122772605058145e-06, 'samples': 24090112, 'steps': 47050, 'batch_loss/train': 0.7516122348606586} +12/29/2021 03:46:49 - INFO - codeparrot_training - Step 47051: {'lr': 4.3093626050758336e-06, 'samples': 24090624, 'steps': 47051, 'batch_loss/train': 0.6257529663853347} +12/29/2021 03:46:59 - INFO - codeparrot_training - Step 47052: {'lr': 4.306448926419071e-06, 'samples': 24091136, 'steps': 47052, 'batch_loss/train': 0.7238174725789577} +12/29/2021 03:47:12 - INFO - codeparrot_training - Step 47053: {'lr': 4.3035362245471e-06, 'samples': 24091648, 'steps': 47053, 'batch_loss/train': 0.7787390621379018} +12/29/2021 03:47:22 - INFO - codeparrot_training - Step 47054: {'lr': 4.300624499471495e-06, 'samples': 24092160, 'steps': 47054, 'batch_loss/train': 0.6690286248922348} +12/29/2021 03:47:33 - INFO - codeparrot_training - Step 47055: {'lr': 4.29771375120383e-06, 'samples': 24092672, 'steps': 47055, 'batch_loss/train': 0.7141740305814892} +12/29/2021 03:47:45 - INFO - codeparrot_training - Step 47056: {'lr': 4.29480397975568e-06, 'samples': 24093184, 'steps': 47056, 'batch_loss/train': 0.6474840235896409} +12/29/2021 03:47:56 - INFO - codeparrot_training - Step 47057: {'lr': 4.291895185138617e-06, 'samples': 24093696, 'steps': 47057, 'batch_loss/train': 0.6953460448421538} +12/29/2021 03:48:06 - INFO - codeparrot_training - Step 47058: {'lr': 4.288987367364189e-06, 'samples': 24094208, 'steps': 47058, 'batch_loss/train': 0.6817285167053342} +12/29/2021 03:48:20 - INFO - codeparrot_training - Step 47059: {'lr': 4.286080526443997e-06, 'samples': 24094720, 'steps': 47059, 'batch_loss/train': 0.6904769740067422} +12/29/2021 03:48:31 - INFO - codeparrot_training - Step 47060: {'lr': 4.28317466238956e-06, 'samples': 24095232, 'steps': 47060, 'batch_loss/train': 0.7156988633796573} +12/29/2021 03:48:42 - INFO - codeparrot_training - Step 47061: {'lr': 4.280269775212425e-06, 'samples': 24095744, 'steps': 47061, 'batch_loss/train': 0.6196220135316253} +12/29/2021 03:48:52 - INFO - codeparrot_training - Step 47062: {'lr': 4.277365864924193e-06, 'samples': 24096256, 'steps': 47062, 'batch_loss/train': 0.6561126867309213} +12/29/2021 03:49:04 - INFO - codeparrot_training - Step 47063: {'lr': 4.274462931536355e-06, 'samples': 24096768, 'steps': 47063, 'batch_loss/train': 0.6525152190588415} +12/29/2021 03:49:15 - INFO - codeparrot_training - Step 47064: {'lr': 4.2715609750604564e-06, 'samples': 24097280, 'steps': 47064, 'batch_loss/train': 0.6199501128867269} +12/29/2021 03:49:26 - INFO - codeparrot_training - Step 47065: {'lr': 4.268659995508045e-06, 'samples': 24097792, 'steps': 47065, 'batch_loss/train': 0.853235880844295} +12/29/2021 03:49:38 - INFO - codeparrot_training - Step 47066: {'lr': 4.2657599928906946e-06, 'samples': 24098304, 'steps': 47066, 'batch_loss/train': 0.63114316604333} +12/29/2021 03:49:48 - INFO - codeparrot_training - Step 47067: {'lr': 4.262860967219867e-06, 'samples': 24098816, 'steps': 47067, 'batch_loss/train': 0.7357070073485374} +12/29/2021 03:49:59 - INFO - codeparrot_training - Step 47068: {'lr': 4.2599629185071096e-06, 'samples': 24099328, 'steps': 47068, 'batch_loss/train': 0.7003312958404422} +12/29/2021 03:50:13 - INFO - codeparrot_training - Step 47069: {'lr': 4.257065846763969e-06, 'samples': 24099840, 'steps': 47069, 'batch_loss/train': 0.4217334470595233} +12/29/2021 03:50:24 - INFO - codeparrot_training - Step 47070: {'lr': 4.254169752001935e-06, 'samples': 24100352, 'steps': 47070, 'batch_loss/train': 0.7180052832700312} +12/29/2021 03:50:34 - INFO - codeparrot_training - Step 47071: {'lr': 4.251274634232555e-06, 'samples': 24100864, 'steps': 47071, 'batch_loss/train': 0.7111645918339491} +12/29/2021 03:50:45 - INFO - codeparrot_training - Step 47072: {'lr': 4.248380493467291e-06, 'samples': 24101376, 'steps': 47072, 'batch_loss/train': 0.6550576272420585} +12/29/2021 03:50:57 - INFO - codeparrot_training - Step 47073: {'lr': 4.2454873297176626e-06, 'samples': 24101888, 'steps': 47073, 'batch_loss/train': 0.684830428857822} +12/29/2021 03:51:08 - INFO - codeparrot_training - Step 47074: {'lr': 4.242595142995243e-06, 'samples': 24102400, 'steps': 47074, 'batch_loss/train': 0.7204367009690031} +12/29/2021 03:51:18 - INFO - codeparrot_training - Step 47075: {'lr': 4.239703933311412e-06, 'samples': 24102912, 'steps': 47075, 'batch_loss/train': 0.6278671049512923} +12/29/2021 03:51:32 - INFO - codeparrot_training - Step 47076: {'lr': 4.236813700677744e-06, 'samples': 24103424, 'steps': 47076, 'batch_loss/train': 0.6813830161700025} +12/29/2021 03:51:43 - INFO - codeparrot_training - Step 47077: {'lr': 4.233924445105703e-06, 'samples': 24103936, 'steps': 47077, 'batch_loss/train': 0.5562366964295506} +12/29/2021 03:51:54 - INFO - codeparrot_training - Step 47078: {'lr': 4.231036166606778e-06, 'samples': 24104448, 'steps': 47078, 'batch_loss/train': 0.6050170632079244} +12/29/2021 03:52:06 - INFO - codeparrot_training - Step 47079: {'lr': 4.22814886519246e-06, 'samples': 24104960, 'steps': 47079, 'batch_loss/train': 0.8252790961414576} +12/29/2021 03:52:17 - INFO - codeparrot_training - Step 47080: {'lr': 4.225262540874241e-06, 'samples': 24105472, 'steps': 47080, 'batch_loss/train': 0.7077500422019511} +12/29/2021 03:52:27 - INFO - codeparrot_training - Step 47081: {'lr': 4.2223771936635565e-06, 'samples': 24105984, 'steps': 47081, 'batch_loss/train': 0.5619826302863657} +12/29/2021 03:52:39 - INFO - codeparrot_training - Step 47082: {'lr': 4.219492823571896e-06, 'samples': 24106496, 'steps': 47082, 'batch_loss/train': 0.7627954710042104} +12/29/2021 03:52:50 - INFO - codeparrot_training - Step 47083: {'lr': 4.2166094306107236e-06, 'samples': 24107008, 'steps': 47083, 'batch_loss/train': 0.6094183581881225} +12/29/2021 03:53:00 - INFO - codeparrot_training - Step 47084: {'lr': 4.213727014791529e-06, 'samples': 24107520, 'steps': 47084, 'batch_loss/train': 0.7423912296071649} +12/29/2021 03:53:11 - INFO - codeparrot_training - Step 47085: {'lr': 4.210845576125749e-06, 'samples': 24108032, 'steps': 47085, 'batch_loss/train': 0.5866995882533956} +12/29/2021 03:53:25 - INFO - codeparrot_training - Step 47086: {'lr': 4.207965114624818e-06, 'samples': 24108544, 'steps': 47086, 'batch_loss/train': 0.6772344553319272} +12/29/2021 03:53:36 - INFO - codeparrot_training - Step 47087: {'lr': 4.205085630300226e-06, 'samples': 24109056, 'steps': 47087, 'batch_loss/train': 0.6258214358240366} +12/29/2021 03:53:46 - INFO - codeparrot_training - Step 47088: {'lr': 4.2022071231634095e-06, 'samples': 24109568, 'steps': 47088, 'batch_loss/train': 0.6811493635177612} +12/29/2021 03:53:58 - INFO - codeparrot_training - Step 47089: {'lr': 4.199329593225804e-06, 'samples': 24110080, 'steps': 47089, 'batch_loss/train': 0.6455626896349713} +12/29/2021 03:54:09 - INFO - codeparrot_training - Step 47090: {'lr': 4.196453040498843e-06, 'samples': 24110592, 'steps': 47090, 'batch_loss/train': 0.6272309699561447} +12/29/2021 03:54:20 - INFO - codeparrot_training - Step 47091: {'lr': 4.193577464993964e-06, 'samples': 24111104, 'steps': 47091, 'batch_loss/train': 0.7425986309535801} +12/29/2021 03:54:32 - INFO - codeparrot_training - Step 47092: {'lr': 4.190702866722629e-06, 'samples': 24111616, 'steps': 47092, 'batch_loss/train': 0.7392220366746187} +12/29/2021 03:54:42 - INFO - codeparrot_training - Step 47093: {'lr': 4.187829245696245e-06, 'samples': 24112128, 'steps': 47093, 'batch_loss/train': 0.675106986425817} +12/29/2021 03:54:53 - INFO - codeparrot_training - Step 47094: {'lr': 4.1849566019262205e-06, 'samples': 24112640, 'steps': 47094, 'batch_loss/train': 0.6958549956325442} +12/29/2021 03:55:05 - INFO - codeparrot_training - Step 47095: {'lr': 4.182084935424019e-06, 'samples': 24113152, 'steps': 47095, 'batch_loss/train': 0.7099389238283038} +12/29/2021 03:55:16 - INFO - codeparrot_training - Step 47096: {'lr': 4.179214246201019e-06, 'samples': 24113664, 'steps': 47096, 'batch_loss/train': 0.6139524129685014} +12/29/2021 03:55:26 - INFO - codeparrot_training - Step 47097: {'lr': 4.176344534268656e-06, 'samples': 24114176, 'steps': 47097, 'batch_loss/train': 0.6807351768948138} +12/29/2021 03:55:37 - INFO - codeparrot_training - Step 47098: {'lr': 4.1734757996382824e-06, 'samples': 24114688, 'steps': 47098, 'batch_loss/train': 0.6247204847168177} +12/29/2021 03:55:51 - INFO - codeparrot_training - Step 47099: {'lr': 4.1706080423213885e-06, 'samples': 24115200, 'steps': 47099, 'batch_loss/train': 0.7192319049499929} +12/29/2021 03:56:02 - INFO - codeparrot_training - Step 47100: {'lr': 4.167741262329328e-06, 'samples': 24115712, 'steps': 47100, 'batch_loss/train': 0.8610155978240073} +12/29/2021 03:56:12 - INFO - codeparrot_training - Step 47101: {'lr': 4.164875459673506e-06, 'samples': 24116224, 'steps': 47101, 'batch_loss/train': 0.5579450849909335} +12/29/2021 03:56:25 - INFO - codeparrot_training - Step 47102: {'lr': 4.1620106343653045e-06, 'samples': 24116736, 'steps': 47102, 'batch_loss/train': 0.7072551655583084} +12/29/2021 03:56:35 - INFO - codeparrot_training - Step 47103: {'lr': 4.159146786416129e-06, 'samples': 24117248, 'steps': 47103, 'batch_loss/train': 0.6131587554409634} +12/29/2021 03:56:46 - INFO - codeparrot_training - Step 47104: {'lr': 4.156283915837361e-06, 'samples': 24117760, 'steps': 47104, 'batch_loss/train': 0.4604004848806653} +12/29/2021 03:57:00 - INFO - codeparrot_training - Step 47105: {'lr': 4.153422022640407e-06, 'samples': 24118272, 'steps': 47105, 'batch_loss/train': 0.7485960652120411} +12/29/2021 03:57:11 - INFO - codeparrot_training - Step 47106: {'lr': 4.150561106836592e-06, 'samples': 24118784, 'steps': 47106, 'batch_loss/train': 0.7256812689010985} +12/29/2021 03:57:22 - INFO - codeparrot_training - Step 47107: {'lr': 4.147701168437295e-06, 'samples': 24119296, 'steps': 47107, 'batch_loss/train': 0.6176111600943841} +12/29/2021 03:57:32 - INFO - codeparrot_training - Step 47108: {'lr': 4.144842207453953e-06, 'samples': 24119808, 'steps': 47108, 'batch_loss/train': 0.7314280662685633} +12/29/2021 03:57:44 - INFO - codeparrot_training - Step 47109: {'lr': 4.14198422389786e-06, 'samples': 24120320, 'steps': 47109, 'batch_loss/train': 1.0877408916130662} +12/29/2021 03:57:55 - INFO - codeparrot_training - Step 47110: {'lr': 4.1391272177803975e-06, 'samples': 24120832, 'steps': 47110, 'batch_loss/train': 0.5843777291011065} +12/29/2021 03:58:06 - INFO - codeparrot_training - Step 47111: {'lr': 4.136271189112972e-06, 'samples': 24121344, 'steps': 47111, 'batch_loss/train': 0.6234440100379288} +12/29/2021 03:58:18 - INFO - codeparrot_training - Step 47112: {'lr': 4.133416137906881e-06, 'samples': 24121856, 'steps': 47112, 'batch_loss/train': 0.7620376674458385} +12/29/2021 03:58:28 - INFO - codeparrot_training - Step 47113: {'lr': 4.130562064173449e-06, 'samples': 24122368, 'steps': 47113, 'batch_loss/train': 0.7799432084430009} +12/29/2021 03:58:39 - INFO - codeparrot_training - Step 47114: {'lr': 4.127708967924138e-06, 'samples': 24122880, 'steps': 47114, 'batch_loss/train': 0.6687976662069559} +12/29/2021 03:58:50 - INFO - codeparrot_training - Step 47115: {'lr': 4.124856849170161e-06, 'samples': 24123392, 'steps': 47115, 'batch_loss/train': 0.6991650925483555} +12/29/2021 03:59:04 - INFO - codeparrot_training - Step 47116: {'lr': 4.122005707922926e-06, 'samples': 24123904, 'steps': 47116, 'batch_loss/train': 0.5886185695417225} +12/29/2021 03:59:14 - INFO - codeparrot_training - Step 47117: {'lr': 4.119155544193787e-06, 'samples': 24124416, 'steps': 47117, 'batch_loss/train': 0.5736717651598155} +12/29/2021 03:59:25 - INFO - codeparrot_training - Step 47118: {'lr': 4.11630635799401e-06, 'samples': 24124928, 'steps': 47118, 'batch_loss/train': 0.6790058552287519} +12/29/2021 03:59:37 - INFO - codeparrot_training - Step 47119: {'lr': 4.1134581493349755e-06, 'samples': 24125440, 'steps': 47119, 'batch_loss/train': 0.7113430555909872} +12/29/2021 03:59:48 - INFO - codeparrot_training - Step 47120: {'lr': 4.110610918227981e-06, 'samples': 24125952, 'steps': 47120, 'batch_loss/train': 0.7409545611590147} +12/29/2021 03:59:58 - INFO - codeparrot_training - Step 47121: {'lr': 4.107764664684349e-06, 'samples': 24126464, 'steps': 47121, 'batch_loss/train': 0.7517308602109551} +12/29/2021 04:00:10 - INFO - codeparrot_training - Step 47122: {'lr': 4.104919388715406e-06, 'samples': 24126976, 'steps': 47122, 'batch_loss/train': 0.7531667547300458} +12/29/2021 04:00:21 - INFO - codeparrot_training - Step 47123: {'lr': 4.102075090332474e-06, 'samples': 24127488, 'steps': 47123, 'batch_loss/train': 0.7258398644626141} +12/29/2021 04:00:32 - INFO - codeparrot_training - Step 47124: {'lr': 4.099231769546796e-06, 'samples': 24128000, 'steps': 47124, 'batch_loss/train': 0.6859722873196006} +12/29/2021 04:00:42 - INFO - codeparrot_training - Step 47125: {'lr': 4.096389426369751e-06, 'samples': 24128512, 'steps': 47125, 'batch_loss/train': 0.779086618218571} +12/29/2021 04:00:54 - INFO - codeparrot_training - Step 47126: {'lr': 4.093548060812635e-06, 'samples': 24129024, 'steps': 47126, 'batch_loss/train': 0.678695797920227} +12/29/2021 04:01:05 - INFO - codeparrot_training - Step 47127: {'lr': 4.09070767288669e-06, 'samples': 24129536, 'steps': 47127, 'batch_loss/train': 0.6571711023279931} +12/29/2021 04:01:16 - INFO - codeparrot_training - Step 47128: {'lr': 4.08786826260324e-06, 'samples': 24130048, 'steps': 47128, 'batch_loss/train': 0.7469643047079444} +12/29/2021 04:01:29 - INFO - codeparrot_training - Step 47129: {'lr': 4.085029829973608e-06, 'samples': 24130560, 'steps': 47129, 'batch_loss/train': 0.7287276377901435} +12/29/2021 04:01:39 - INFO - codeparrot_training - Step 47130: {'lr': 4.082192375009009e-06, 'samples': 24131072, 'steps': 47130, 'batch_loss/train': 0.6814271840266883} +12/29/2021 04:01:50 - INFO - codeparrot_training - Step 47131: {'lr': 4.079355897720766e-06, 'samples': 24131584, 'steps': 47131, 'batch_loss/train': 0.6423871541628614} +12/29/2021 04:02:02 - INFO - codeparrot_training - Step 47132: {'lr': 4.076520398120149e-06, 'samples': 24132096, 'steps': 47132, 'batch_loss/train': 0.7361748018302023} +12/29/2021 04:02:13 - INFO - codeparrot_training - Step 47133: {'lr': 4.073685876218425e-06, 'samples': 24132608, 'steps': 47133, 'batch_loss/train': 0.5749157338868827} +12/29/2021 04:02:23 - INFO - codeparrot_training - Step 47134: {'lr': 4.070852332026864e-06, 'samples': 24133120, 'steps': 47134, 'batch_loss/train': 0.6460206578485668} +12/29/2021 04:02:36 - INFO - codeparrot_training - Step 47135: {'lr': 4.0680197655567356e-06, 'samples': 24133632, 'steps': 47135, 'batch_loss/train': 0.7068421457079239} +12/29/2021 04:02:47 - INFO - codeparrot_training - Step 47136: {'lr': 4.065188176819307e-06, 'samples': 24134144, 'steps': 47136, 'batch_loss/train': 0.6271029440686107} +12/29/2021 04:02:57 - INFO - codeparrot_training - Step 47137: {'lr': 4.062357565825847e-06, 'samples': 24134656, 'steps': 47137, 'batch_loss/train': 0.6559476270340383} +12/29/2021 04:03:08 - INFO - codeparrot_training - Step 47138: {'lr': 4.059527932587598e-06, 'samples': 24135168, 'steps': 47138, 'batch_loss/train': 0.6397389628691599} +12/29/2021 04:03:20 - INFO - codeparrot_training - Step 47139: {'lr': 4.056699277115744e-06, 'samples': 24135680, 'steps': 47139, 'batch_loss/train': 0.69045653619105} +12/29/2021 04:03:31 - INFO - codeparrot_training - Step 47140: {'lr': 4.053871599421638e-06, 'samples': 24136192, 'steps': 47140, 'batch_loss/train': 0.6034603648004122} +12/29/2021 04:03:41 - INFO - codeparrot_training - Step 47141: {'lr': 4.051044899516465e-06, 'samples': 24136704, 'steps': 47141, 'batch_loss/train': 0.6296619644854218} +12/29/2021 04:03:54 - INFO - codeparrot_training - Step 47142: {'lr': 4.048219177411439e-06, 'samples': 24137216, 'steps': 47142, 'batch_loss/train': 0.6958554275333881} +12/29/2021 04:04:04 - INFO - codeparrot_training - Step 47143: {'lr': 4.045394433117882e-06, 'samples': 24137728, 'steps': 47143, 'batch_loss/train': 0.7627163222059608} +12/29/2021 04:04:15 - INFO - codeparrot_training - Step 47144: {'lr': 4.0425706666469275e-06, 'samples': 24138240, 'steps': 47144, 'batch_loss/train': 0.5821665039984509} +12/29/2021 04:04:28 - INFO - codeparrot_training - Step 47145: {'lr': 4.0397478780098415e-06, 'samples': 24138752, 'steps': 47145, 'batch_loss/train': 0.7288061415310949} +12/29/2021 04:04:38 - INFO - codeparrot_training - Step 47146: {'lr': 4.036926067217866e-06, 'samples': 24139264, 'steps': 47146, 'batch_loss/train': 0.749957786872983} +12/29/2021 04:04:49 - INFO - codeparrot_training - Step 47147: {'lr': 4.0341052342821875e-06, 'samples': 24139776, 'steps': 47147, 'batch_loss/train': 0.7670976901426911} +12/29/2021 04:05:01 - INFO - codeparrot_training - Step 47148: {'lr': 4.031285379214045e-06, 'samples': 24140288, 'steps': 47148, 'batch_loss/train': 0.8000395749695599} +12/29/2021 04:05:12 - INFO - codeparrot_training - Step 47149: {'lr': 4.028466502024624e-06, 'samples': 24140800, 'steps': 47149, 'batch_loss/train': 0.8061518063768744} +12/29/2021 04:05:22 - INFO - codeparrot_training - Step 47150: {'lr': 4.025648602725141e-06, 'samples': 24141312, 'steps': 47150, 'batch_loss/train': 0.6361829284578562} +12/29/2021 04:05:33 - INFO - codeparrot_training - Step 47151: {'lr': 4.0228316813268055e-06, 'samples': 24141824, 'steps': 47151, 'batch_loss/train': 0.7089717881754041} +12/29/2021 04:05:46 - INFO - codeparrot_training - Step 47152: {'lr': 4.02001573784086e-06, 'samples': 24142336, 'steps': 47152, 'batch_loss/train': 0.7655357262119651} +12/29/2021 04:05:56 - INFO - codeparrot_training - Step 47153: {'lr': 4.017200772278406e-06, 'samples': 24142848, 'steps': 47153, 'batch_loss/train': 0.5586309708014596} +12/29/2021 04:06:07 - INFO - codeparrot_training - Step 47154: {'lr': 4.014386784650659e-06, 'samples': 24143360, 'steps': 47154, 'batch_loss/train': 0.6151571707159746} +12/29/2021 04:06:19 - INFO - codeparrot_training - Step 47155: {'lr': 4.011573774968885e-06, 'samples': 24143872, 'steps': 47155, 'batch_loss/train': 0.5777645230700728} +12/29/2021 04:06:30 - INFO - codeparrot_training - Step 47156: {'lr': 4.008761743244188e-06, 'samples': 24144384, 'steps': 47156, 'batch_loss/train': 0.6581726067815907} +12/29/2021 04:06:40 - INFO - codeparrot_training - Step 47157: {'lr': 4.0059506894877805e-06, 'samples': 24144896, 'steps': 47157, 'batch_loss/train': 0.7501810351386666} +12/29/2021 04:06:52 - INFO - codeparrot_training - Step 47158: {'lr': 4.00314061371082e-06, 'samples': 24145408, 'steps': 47158, 'batch_loss/train': 0.6569695714861155} +12/29/2021 04:07:03 - INFO - codeparrot_training - Step 47159: {'lr': 4.000331515924466e-06, 'samples': 24145920, 'steps': 47159, 'batch_loss/train': 0.6253840219287667} +12/29/2021 04:07:14 - INFO - codeparrot_training - Step 47160: {'lr': 3.997523396139929e-06, 'samples': 24146432, 'steps': 47160, 'batch_loss/train': 0.7146572545170784} +12/29/2021 04:07:24 - INFO - codeparrot_training - Step 47161: {'lr': 3.994716254368369e-06, 'samples': 24146944, 'steps': 47161, 'batch_loss/train': 0.7149890176951885} +12/29/2021 04:07:37 - INFO - codeparrot_training - Step 47162: {'lr': 3.991910090620915e-06, 'samples': 24147456, 'steps': 47162, 'batch_loss/train': 0.7829069432336837} +12/29/2021 04:07:47 - INFO - codeparrot_training - Step 47163: {'lr': 3.9891049049087245e-06, 'samples': 24147968, 'steps': 47163, 'batch_loss/train': 0.44720564933959395} +12/29/2021 04:07:58 - INFO - codeparrot_training - Step 47164: {'lr': 3.986300697242984e-06, 'samples': 24148480, 'steps': 47164, 'batch_loss/train': 0.7252958118915558} +12/29/2021 04:08:11 - INFO - codeparrot_training - Step 47165: {'lr': 3.983497467634795e-06, 'samples': 24148992, 'steps': 47165, 'batch_loss/train': 0.7007385930046439} +12/29/2021 04:08:22 - INFO - codeparrot_training - Step 47166: {'lr': 3.98069521609537e-06, 'samples': 24149504, 'steps': 47166, 'batch_loss/train': 0.5367925565806217} +12/29/2021 04:08:32 - INFO - codeparrot_training - Step 47167: {'lr': 3.977893942635785e-06, 'samples': 24150016, 'steps': 47167, 'batch_loss/train': 0.7038229294121265} +12/29/2021 04:08:44 - INFO - codeparrot_training - Step 47168: {'lr': 3.975093647267169e-06, 'samples': 24150528, 'steps': 47168, 'batch_loss/train': 0.6889733271673322} +12/29/2021 04:08:55 - INFO - codeparrot_training - Step 47169: {'lr': 3.9722943300007365e-06, 'samples': 24151040, 'steps': 47169, 'batch_loss/train': 0.7763061774894595} +12/29/2021 04:09:06 - INFO - codeparrot_training - Step 47170: {'lr': 3.969495990847533e-06, 'samples': 24151552, 'steps': 47170, 'batch_loss/train': 0.5884782264765818} +12/29/2021 04:09:16 - INFO - codeparrot_training - Step 47171: {'lr': 3.9666986298187154e-06, 'samples': 24152064, 'steps': 47171, 'batch_loss/train': 0.7044702200219035} +12/29/2021 04:09:29 - INFO - codeparrot_training - Step 47172: {'lr': 3.963902246925416e-06, 'samples': 24152576, 'steps': 47172, 'batch_loss/train': 0.7634251089766622} +12/29/2021 04:09:39 - INFO - codeparrot_training - Step 47173: {'lr': 3.961106842178736e-06, 'samples': 24153088, 'steps': 47173, 'batch_loss/train': 0.670781871303916} +12/29/2021 04:09:50 - INFO - codeparrot_training - Step 47174: {'lr': 3.958312415589804e-06, 'samples': 24153600, 'steps': 47174, 'batch_loss/train': 0.7621516045182943} +12/29/2021 04:10:03 - INFO - codeparrot_training - Step 47175: {'lr': 3.955518967169725e-06, 'samples': 24154112, 'steps': 47175, 'batch_loss/train': 0.6216469770297408} +12/29/2021 04:10:13 - INFO - codeparrot_training - Step 47176: {'lr': 3.952726496929571e-06, 'samples': 24154624, 'steps': 47176, 'batch_loss/train': 0.7255449220538139} +12/29/2021 04:10:24 - INFO - codeparrot_training - Step 47177: {'lr': 3.9499350048805005e-06, 'samples': 24155136, 'steps': 47177, 'batch_loss/train': 0.63723874615971} +12/29/2021 04:10:36 - INFO - codeparrot_training - Step 47178: {'lr': 3.947144491033589e-06, 'samples': 24155648, 'steps': 47178, 'batch_loss/train': 0.4140433834400028} +12/29/2021 04:10:47 - INFO - codeparrot_training - Step 47179: {'lr': 3.944354955399882e-06, 'samples': 24156160, 'steps': 47179, 'batch_loss/train': 0.7197074126452208} +12/29/2021 04:10:58 - INFO - codeparrot_training - Step 47180: {'lr': 3.941566397990537e-06, 'samples': 24156672, 'steps': 47180, 'batch_loss/train': 0.7077867463231087} +12/29/2021 04:11:08 - INFO - codeparrot_training - Step 47181: {'lr': 3.938778818816657e-06, 'samples': 24157184, 'steps': 47181, 'batch_loss/train': 0.5895962384529412} +12/29/2021 04:11:21 - INFO - codeparrot_training - Step 47182: {'lr': 3.935992217889234e-06, 'samples': 24157696, 'steps': 47182, 'batch_loss/train': 0.7891700994223356} +12/29/2021 04:11:32 - INFO - codeparrot_training - Step 47183: {'lr': 3.933206595219396e-06, 'samples': 24158208, 'steps': 47183, 'batch_loss/train': 0.7467448376119137} +12/29/2021 04:11:42 - INFO - codeparrot_training - Step 47184: {'lr': 3.930421950818247e-06, 'samples': 24158720, 'steps': 47184, 'batch_loss/train': 0.7053601071238518} +12/29/2021 04:11:55 - INFO - codeparrot_training - Step 47185: {'lr': 3.927638284696805e-06, 'samples': 24159232, 'steps': 47185, 'batch_loss/train': 0.6215266624931246} +12/29/2021 04:12:05 - INFO - codeparrot_training - Step 47186: {'lr': 3.924855596866145e-06, 'samples': 24159744, 'steps': 47186, 'batch_loss/train': 0.5711812344379723} +12/29/2021 04:12:16 - INFO - codeparrot_training - Step 47187: {'lr': 3.922073887337368e-06, 'samples': 24160256, 'steps': 47187, 'batch_loss/train': 0.7379737896844745} +12/29/2021 04:12:28 - INFO - codeparrot_training - Step 47188: {'lr': 3.919293156121523e-06, 'samples': 24160768, 'steps': 47188, 'batch_loss/train': 0.7831715629436076} +12/29/2021 04:12:39 - INFO - codeparrot_training - Step 47189: {'lr': 3.9165134032296265e-06, 'samples': 24161280, 'steps': 47189, 'batch_loss/train': 0.673020385322161} +12/29/2021 04:12:49 - INFO - codeparrot_training - Step 47190: {'lr': 3.913734628672782e-06, 'samples': 24161792, 'steps': 47190, 'batch_loss/train': 0.8260582545772195} +12/29/2021 04:13:02 - INFO - codeparrot_training - Step 47191: {'lr': 3.9109568324619536e-06, 'samples': 24162304, 'steps': 47191, 'batch_loss/train': 0.7316725673153996} +12/29/2021 04:13:12 - INFO - codeparrot_training - Step 47192: {'lr': 3.908180014608298e-06, 'samples': 24162816, 'steps': 47192, 'batch_loss/train': 0.5094256704906002} +12/29/2021 04:13:23 - INFO - codeparrot_training - Step 47193: {'lr': 3.905404175122779e-06, 'samples': 24163328, 'steps': 47193, 'batch_loss/train': 0.43209989252500236} +12/29/2021 04:13:34 - INFO - codeparrot_training - Step 47194: {'lr': 3.9026293140164426e-06, 'samples': 24163840, 'steps': 47194, 'batch_loss/train': 0.7553963102400303} +12/29/2021 04:13:46 - INFO - codeparrot_training - Step 47195: {'lr': 3.899855431300364e-06, 'samples': 24164352, 'steps': 47195, 'batch_loss/train': 0.6443294589407742} +12/29/2021 04:13:57 - INFO - codeparrot_training - Step 47196: {'lr': 3.8970825269855074e-06, 'samples': 24164864, 'steps': 47196, 'batch_loss/train': 0.5767926769331098} +12/29/2021 04:14:08 - INFO - codeparrot_training - Step 47197: {'lr': 3.894310601082918e-06, 'samples': 24165376, 'steps': 47197, 'batch_loss/train': 0.7491289428435266} +12/29/2021 04:14:20 - INFO - codeparrot_training - Step 47198: {'lr': 3.891539653603643e-06, 'samples': 24165888, 'steps': 47198, 'batch_loss/train': 0.7412739004939795} +12/29/2021 04:14:30 - INFO - codeparrot_training - Step 47199: {'lr': 3.888769684558674e-06, 'samples': 24166400, 'steps': 47199, 'batch_loss/train': 0.7844371376559138} +12/29/2021 04:14:41 - INFO - codeparrot_training - Step 47200: {'lr': 3.88600069395903e-06, 'samples': 24166912, 'steps': 47200, 'batch_loss/train': 0.5812663870747201} +12/29/2021 04:14:53 - INFO - codeparrot_training - Step 47201: {'lr': 3.883232681815729e-06, 'samples': 24167424, 'steps': 47201, 'batch_loss/train': 0.6843080339021981} +12/29/2021 04:15:04 - INFO - codeparrot_training - Step 47202: {'lr': 3.880465648139736e-06, 'samples': 24167936, 'steps': 47202, 'batch_loss/train': 0.570095396891702} +12/29/2021 04:15:14 - INFO - codeparrot_training - Step 47203: {'lr': 3.877699592942069e-06, 'samples': 24168448, 'steps': 47203, 'batch_loss/train': 0.6987306722439826} +12/29/2021 04:15:25 - INFO - codeparrot_training - Step 47204: {'lr': 3.8749345162338025e-06, 'samples': 24168960, 'steps': 47204, 'batch_loss/train': 0.6873667016625404} +12/29/2021 04:15:38 - INFO - codeparrot_training - Step 47205: {'lr': 3.872170418025789e-06, 'samples': 24169472, 'steps': 47205, 'batch_loss/train': 0.6869712881743908} +12/29/2021 04:15:48 - INFO - codeparrot_training - Step 47206: {'lr': 3.8694072983291305e-06, 'samples': 24169984, 'steps': 47206, 'batch_loss/train': 0.6928585779387504} +12/29/2021 04:15:59 - INFO - codeparrot_training - Step 47207: {'lr': 3.866645157154791e-06, 'samples': 24170496, 'steps': 47207, 'batch_loss/train': 0.7179375817067921} +12/29/2021 04:16:11 - INFO - codeparrot_training - Step 47208: {'lr': 3.863883994513706e-06, 'samples': 24171008, 'steps': 47208, 'batch_loss/train': 0.7155772187979892} +12/29/2021 04:16:22 - INFO - codeparrot_training - Step 47209: {'lr': 3.861123810416866e-06, 'samples': 24171520, 'steps': 47209, 'batch_loss/train': 0.6743462951853871} +12/29/2021 04:16:32 - INFO - codeparrot_training - Step 47210: {'lr': 3.858364604875292e-06, 'samples': 24172032, 'steps': 47210, 'batch_loss/train': 0.5889667025767267} +12/29/2021 04:16:45 - INFO - codeparrot_training - Step 47211: {'lr': 3.855606377899917e-06, 'samples': 24172544, 'steps': 47211, 'batch_loss/train': 0.5640150926192291} +12/29/2021 04:16:55 - INFO - codeparrot_training - Step 47212: {'lr': 3.8528491295016786e-06, 'samples': 24173056, 'steps': 47212, 'batch_loss/train': 0.6636685254052281} +12/29/2021 04:17:06 - INFO - codeparrot_training - Step 47213: {'lr': 3.850092859691595e-06, 'samples': 24173568, 'steps': 47213, 'batch_loss/train': 0.6463785246014595} +12/29/2021 04:17:18 - INFO - codeparrot_training - Step 47214: {'lr': 3.847337568480603e-06, 'samples': 24174080, 'steps': 47214, 'batch_loss/train': 0.7530632107518613} +12/29/2021 04:17:29 - INFO - codeparrot_training - Step 47215: {'lr': 3.844583255879636e-06, 'samples': 24174592, 'steps': 47215, 'batch_loss/train': 0.7119183982722461} +12/29/2021 04:17:40 - INFO - codeparrot_training - Step 47216: {'lr': 3.841829921899687e-06, 'samples': 24175104, 'steps': 47216, 'batch_loss/train': 0.7352344272658229} +12/29/2021 04:17:50 - INFO - codeparrot_training - Step 47217: {'lr': 3.839077566551663e-06, 'samples': 24175616, 'steps': 47217, 'batch_loss/train': 0.6709822658449411} +12/29/2021 04:18:03 - INFO - codeparrot_training - Step 47218: {'lr': 3.836326189846528e-06, 'samples': 24176128, 'steps': 47218, 'batch_loss/train': 0.6288231869693846} +12/29/2021 04:18:13 - INFO - codeparrot_training - Step 47219: {'lr': 3.833575791795218e-06, 'samples': 24176640, 'steps': 47219, 'batch_loss/train': 0.7060734434053302} +12/29/2021 04:18:24 - INFO - codeparrot_training - Step 47220: {'lr': 3.830826372408613e-06, 'samples': 24177152, 'steps': 47220, 'batch_loss/train': 0.6516873701475561} +12/29/2021 04:18:37 - INFO - codeparrot_training - Step 47221: {'lr': 3.82807793169776e-06, 'samples': 24177664, 'steps': 47221, 'batch_loss/train': 0.6230572260683402} +12/29/2021 04:18:47 - INFO - codeparrot_training - Step 47222: {'lr': 3.825330469673483e-06, 'samples': 24178176, 'steps': 47222, 'batch_loss/train': 0.560643781645922} +12/29/2021 04:18:58 - INFO - codeparrot_training - Step 47223: {'lr': 3.8225839863467446e-06, 'samples': 24178688, 'steps': 47223, 'batch_loss/train': 0.7661683949409053} +12/29/2021 04:19:10 - INFO - codeparrot_training - Step 47224: {'lr': 3.819838481728455e-06, 'samples': 24179200, 'steps': 47224, 'batch_loss/train': 0.7043728902935982} +12/29/2021 04:19:21 - INFO - codeparrot_training - Step 47225: {'lr': 3.81709395582952e-06, 'samples': 24179712, 'steps': 47225, 'batch_loss/train': 0.5233654241310433} +12/29/2021 04:19:31 - INFO - codeparrot_training - Step 47226: {'lr': 3.814350408660877e-06, 'samples': 24180224, 'steps': 47226, 'batch_loss/train': 0.7633447744883597} +12/29/2021 04:19:42 - INFO - codeparrot_training - Step 47227: {'lr': 3.811607840233461e-06, 'samples': 24180736, 'steps': 47227, 'batch_loss/train': 0.6265803137794137} +12/29/2021 04:19:54 - INFO - codeparrot_training - Step 47228: {'lr': 3.8088662505580683e-06, 'samples': 24181248, 'steps': 47228, 'batch_loss/train': 0.6010170562658459} +12/29/2021 04:20:05 - INFO - codeparrot_training - Step 47229: {'lr': 3.8061256396456913e-06, 'samples': 24181760, 'steps': 47229, 'batch_loss/train': 0.719105651602149} +12/29/2021 04:20:15 - INFO - codeparrot_training - Step 47230: {'lr': 3.803386007507237e-06, 'samples': 24182272, 'steps': 47230, 'batch_loss/train': 0.6331476159393787} +12/29/2021 04:20:28 - INFO - codeparrot_training - Step 47231: {'lr': 3.8006473541535024e-06, 'samples': 24182784, 'steps': 47231, 'batch_loss/train': 0.6616743094054982} +12/29/2021 04:20:38 - INFO - codeparrot_training - Step 47232: {'lr': 3.7979096795954516e-06, 'samples': 24183296, 'steps': 47232, 'batch_loss/train': 0.44032945472281426} +12/29/2021 04:20:49 - INFO - codeparrot_training - Step 47233: {'lr': 3.795172983843964e-06, 'samples': 24183808, 'steps': 47233, 'batch_loss/train': 0.7539029037579894} +12/29/2021 04:21:02 - INFO - codeparrot_training - Step 47234: {'lr': 3.7924372669098927e-06, 'samples': 24184320, 'steps': 47234, 'batch_loss/train': 0.5896135347138625} +12/29/2021 04:21:12 - INFO - codeparrot_training - Step 47235: {'lr': 3.7897025288041178e-06, 'samples': 24184832, 'steps': 47235, 'batch_loss/train': 0.6324143279343843} +12/29/2021 04:21:23 - INFO - codeparrot_training - Step 47236: {'lr': 3.7869687695375466e-06, 'samples': 24185344, 'steps': 47236, 'batch_loss/train': 0.69212706387043} +12/29/2021 04:21:35 - INFO - codeparrot_training - Step 47237: {'lr': 3.7842359891210043e-06, 'samples': 24185856, 'steps': 47237, 'batch_loss/train': 0.7784952083602548} +12/29/2021 04:21:46 - INFO - codeparrot_training - Step 47238: {'lr': 3.781504187565371e-06, 'samples': 24186368, 'steps': 47238, 'batch_loss/train': 0.6840322739444673} +12/29/2021 04:21:56 - INFO - codeparrot_training - Step 47239: {'lr': 3.7787733648814992e-06, 'samples': 24186880, 'steps': 47239, 'batch_loss/train': 0.6866912795230746} +12/29/2021 04:22:07 - INFO - codeparrot_training - Step 47240: {'lr': 3.7760435210802692e-06, 'samples': 24187392, 'steps': 47240, 'batch_loss/train': 0.6998932752758265} +12/29/2021 04:22:19 - INFO - codeparrot_training - Step 47241: {'lr': 3.7733146561725327e-06, 'samples': 24187904, 'steps': 47241, 'batch_loss/train': 0.7266497807577252} +12/29/2021 04:22:30 - INFO - codeparrot_training - Step 47242: {'lr': 3.7705867701691152e-06, 'samples': 24188416, 'steps': 47242, 'batch_loss/train': 0.5685857653152198} +12/29/2021 04:22:40 - INFO - codeparrot_training - Step 47243: {'lr': 3.7678598630808414e-06, 'samples': 24188928, 'steps': 47243, 'batch_loss/train': 0.6655755324754864} +12/29/2021 04:22:53 - INFO - codeparrot_training - Step 47244: {'lr': 3.7651339349186185e-06, 'samples': 24189440, 'steps': 47244, 'batch_loss/train': 0.7801166735589504} +12/29/2021 04:23:04 - INFO - codeparrot_training - Step 47245: {'lr': 3.762408985693272e-06, 'samples': 24189952, 'steps': 47245, 'batch_loss/train': 0.7430111337453127} +12/29/2021 04:23:15 - INFO - codeparrot_training - Step 47246: {'lr': 3.7596850154155427e-06, 'samples': 24190464, 'steps': 47246, 'batch_loss/train': 0.6942439517588355} +12/29/2021 04:23:27 - INFO - codeparrot_training - Step 47247: {'lr': 3.756962024096394e-06, 'samples': 24190976, 'steps': 47247, 'batch_loss/train': 0.7012124609900638} +12/29/2021 04:23:37 - INFO - codeparrot_training - Step 47248: {'lr': 3.75424001174654e-06, 'samples': 24191488, 'steps': 47248, 'batch_loss/train': 0.6186296441592276} +12/29/2021 04:23:48 - INFO - codeparrot_training - Step 47249: {'lr': 3.751518978376889e-06, 'samples': 24192000, 'steps': 47249, 'batch_loss/train': 0.6885399012826383} +12/29/2021 04:23:59 - INFO - codeparrot_training - Step 47250: {'lr': 3.7487989239981813e-06, 'samples': 24192512, 'steps': 47250, 'batch_loss/train': 0.6139397881925106} +12/29/2021 04:24:11 - INFO - codeparrot_training - Step 47251: {'lr': 3.74607984862127e-06, 'samples': 24193024, 'steps': 47251, 'batch_loss/train': 0.7142404166515917} +12/29/2021 04:24:22 - INFO - codeparrot_training - Step 47252: {'lr': 3.74336175225698e-06, 'samples': 24193536, 'steps': 47252, 'batch_loss/train': 0.7445079816970974} +12/29/2021 04:24:33 - INFO - codeparrot_training - Step 47253: {'lr': 3.7406446349161074e-06, 'samples': 24194048, 'steps': 47253, 'batch_loss/train': 0.6977691655047238} +12/29/2021 04:24:45 - INFO - codeparrot_training - Step 47254: {'lr': 3.737928496609422e-06, 'samples': 24194560, 'steps': 47254, 'batch_loss/train': 0.6591089325957} +12/29/2021 04:24:56 - INFO - codeparrot_training - Step 47255: {'lr': 3.735213337347776e-06, 'samples': 24195072, 'steps': 47255, 'batch_loss/train': 0.6774484000634402} +12/29/2021 04:25:06 - INFO - codeparrot_training - Step 47256: {'lr': 3.7324991571419386e-06, 'samples': 24195584, 'steps': 47256, 'batch_loss/train': 0.6494415230117738} +12/29/2021 04:25:18 - INFO - codeparrot_training - Step 47257: {'lr': 3.7297859560026793e-06, 'samples': 24196096, 'steps': 47257, 'batch_loss/train': 0.7633722499012947} +12/29/2021 04:25:29 - INFO - codeparrot_training - Step 47258: {'lr': 3.7270737339407946e-06, 'samples': 24196608, 'steps': 47258, 'batch_loss/train': 0.7206219085492194} +12/29/2021 04:25:40 - INFO - codeparrot_training - Step 47259: {'lr': 3.724362490967109e-06, 'samples': 24197120, 'steps': 47259, 'batch_loss/train': 0.576142790960148} +12/29/2021 04:25:52 - INFO - codeparrot_training - Step 47260: {'lr': 3.721652227092337e-06, 'samples': 24197632, 'steps': 47260, 'batch_loss/train': 0.6674540415406227} +12/29/2021 04:26:03 - INFO - codeparrot_training - Step 47261: {'lr': 3.7189429423272747e-06, 'samples': 24198144, 'steps': 47261, 'batch_loss/train': 0.7688306644558907} +12/29/2021 04:26:14 - INFO - codeparrot_training - Step 47262: {'lr': 3.716234636682747e-06, 'samples': 24198656, 'steps': 47262, 'batch_loss/train': 0.6928766835480928} +12/29/2021 04:26:24 - INFO - codeparrot_training - Step 47263: {'lr': 3.71352731016944e-06, 'samples': 24199168, 'steps': 47263, 'batch_loss/train': 0.7067977776750922} +12/29/2021 04:26:37 - INFO - codeparrot_training - Step 47264: {'lr': 3.7108209627981783e-06, 'samples': 24199680, 'steps': 47264, 'batch_loss/train': 0.7325578103773296} +12/29/2021 04:26:47 - INFO - codeparrot_training - Step 47265: {'lr': 3.708115594579675e-06, 'samples': 24200192, 'steps': 47265, 'batch_loss/train': 0.6213197708129883} +12/29/2021 04:26:58 - INFO - codeparrot_training - Step 47266: {'lr': 3.705411205524728e-06, 'samples': 24200704, 'steps': 47266, 'batch_loss/train': 0.7081809595692903} +12/29/2021 04:27:10 - INFO - codeparrot_training - Step 47267: {'lr': 3.70270779564405e-06, 'samples': 24201216, 'steps': 47267, 'batch_loss/train': 0.6970865987241268} +12/29/2021 04:27:21 - INFO - codeparrot_training - Step 47268: {'lr': 3.700005364948411e-06, 'samples': 24201728, 'steps': 47268, 'batch_loss/train': 0.7194316051900387} +12/29/2021 04:27:31 - INFO - codeparrot_training - Step 47269: {'lr': 3.697303913448552e-06, 'samples': 24202240, 'steps': 47269, 'batch_loss/train': 0.7526983125135303} +12/29/2021 04:27:43 - INFO - codeparrot_training - Step 47270: {'lr': 3.694603441155242e-06, 'samples': 24202752, 'steps': 47270, 'batch_loss/train': 0.6368338298052549} +12/29/2021 04:27:54 - INFO - codeparrot_training - Step 47271: {'lr': 3.691903948079167e-06, 'samples': 24203264, 'steps': 47271, 'batch_loss/train': 0.7296052994206548} +12/29/2021 04:28:05 - INFO - codeparrot_training - Step 47272: {'lr': 3.6892054342310687e-06, 'samples': 24203776, 'steps': 47272, 'batch_loss/train': 0.7155123390257359} +12/29/2021 04:28:17 - INFO - codeparrot_training - Step 47273: {'lr': 3.6865078996216885e-06, 'samples': 24204288, 'steps': 47273, 'batch_loss/train': 0.7327886531129479} +12/29/2021 04:28:28 - INFO - codeparrot_training - Step 47274: {'lr': 3.683811344261767e-06, 'samples': 24204800, 'steps': 47274, 'batch_loss/train': 0.7793607776984572} +12/29/2021 04:28:39 - INFO - codeparrot_training - Step 47275: {'lr': 3.681115768161991e-06, 'samples': 24205312, 'steps': 47275, 'batch_loss/train': 0.5983374619390815} +12/29/2021 04:28:49 - INFO - codeparrot_training - Step 47276: {'lr': 3.6784211713331017e-06, 'samples': 24205824, 'steps': 47276, 'batch_loss/train': 0.6710913763381541} +12/29/2021 04:29:01 - INFO - codeparrot_training - Step 47277: {'lr': 3.675727553785785e-06, 'samples': 24206336, 'steps': 47277, 'batch_loss/train': 0.735424904152751} +12/29/2021 04:29:12 - INFO - codeparrot_training - Step 47278: {'lr': 3.6730349155307817e-06, 'samples': 24206848, 'steps': 47278, 'batch_loss/train': 0.7096242355182767} +12/29/2021 04:29:23 - INFO - codeparrot_training - Step 47279: {'lr': 3.6703432565787785e-06, 'samples': 24207360, 'steps': 47279, 'batch_loss/train': 0.6883650552481413} +12/29/2021 04:29:35 - INFO - codeparrot_training - Step 47280: {'lr': 3.667652576940461e-06, 'samples': 24207872, 'steps': 47280, 'batch_loss/train': 0.7323253443464637} +12/29/2021 04:29:46 - INFO - codeparrot_training - Step 47281: {'lr': 3.6649628766265707e-06, 'samples': 24208384, 'steps': 47281, 'batch_loss/train': 0.7045056140050292} +12/29/2021 04:29:57 - INFO - codeparrot_training - Step 47282: {'lr': 3.6622741556477935e-06, 'samples': 24208896, 'steps': 47282, 'batch_loss/train': 0.6070936750620604} +12/29/2021 04:30:09 - INFO - codeparrot_training - Step 47283: {'lr': 3.6595864140147593e-06, 'samples': 24209408, 'steps': 47283, 'batch_loss/train': 0.742816879414022} +12/29/2021 04:30:19 - INFO - codeparrot_training - Step 47284: {'lr': 3.65689965173821e-06, 'samples': 24209920, 'steps': 47284, 'batch_loss/train': 0.6870803850470111} +12/29/2021 04:30:30 - INFO - codeparrot_training - Step 47285: {'lr': 3.654213868828832e-06, 'samples': 24210432, 'steps': 47285, 'batch_loss/train': 0.7164185456931591} +12/29/2021 04:30:42 - INFO - codeparrot_training - Step 47286: {'lr': 3.6515290652972543e-06, 'samples': 24210944, 'steps': 47286, 'batch_loss/train': 0.6654675554018468} +12/29/2021 04:30:53 - INFO - codeparrot_training - Step 47287: {'lr': 3.648845241154164e-06, 'samples': 24211456, 'steps': 47287, 'batch_loss/train': 0.6982426450704224} +12/29/2021 04:31:04 - INFO - codeparrot_training - Step 47288: {'lr': 3.646162396410302e-06, 'samples': 24211968, 'steps': 47288, 'batch_loss/train': 0.6567234382964671} +12/29/2021 04:31:14 - INFO - codeparrot_training - Step 47289: {'lr': 3.6434805310762433e-06, 'samples': 24212480, 'steps': 47289, 'batch_loss/train': 0.7421408388763666} +12/29/2021 04:31:27 - INFO - codeparrot_training - Step 47290: {'lr': 3.6407996451626733e-06, 'samples': 24212992, 'steps': 47290, 'batch_loss/train': 0.6724711034912616} +12/29/2021 04:31:38 - INFO - codeparrot_training - Step 47291: {'lr': 3.638119738680279e-06, 'samples': 24213504, 'steps': 47291, 'batch_loss/train': 0.6820691018365324} +12/29/2021 04:31:48 - INFO - codeparrot_training - Step 47292: {'lr': 3.6354408116397177e-06, 'samples': 24214016, 'steps': 47292, 'batch_loss/train': 0.6753245261497796} +12/29/2021 04:32:01 - INFO - codeparrot_training - Step 47293: {'lr': 3.632762864051592e-06, 'samples': 24214528, 'steps': 47293, 'batch_loss/train': 0.711485349573195} +12/29/2021 04:32:11 - INFO - codeparrot_training - Step 47294: {'lr': 3.630085895926588e-06, 'samples': 24215040, 'steps': 47294, 'batch_loss/train': 0.783533972222358} +12/29/2021 04:32:22 - INFO - codeparrot_training - Step 47295: {'lr': 3.6274099072753364e-06, 'samples': 24215552, 'steps': 47295, 'batch_loss/train': 0.6810733596794307} +12/29/2021 04:32:34 - INFO - codeparrot_training - Step 47296: {'lr': 3.624734898108467e-06, 'samples': 24216064, 'steps': 47296, 'batch_loss/train': 0.6544530743267387} +12/29/2021 04:32:45 - INFO - codeparrot_training - Step 47297: {'lr': 3.6220608684366387e-06, 'samples': 24216576, 'steps': 47297, 'batch_loss/train': 0.6516347592696548} +12/29/2021 04:32:55 - INFO - codeparrot_training - Step 47298: {'lr': 3.619387818270453e-06, 'samples': 24217088, 'steps': 47298, 'batch_loss/train': 0.7010706514120102} +12/29/2021 04:33:06 - INFO - codeparrot_training - Step 47299: {'lr': 3.6167157476205415e-06, 'samples': 24217600, 'steps': 47299, 'batch_loss/train': 0.7353264163248241} +12/29/2021 04:33:19 - INFO - codeparrot_training - Step 47300: {'lr': 3.6140446564975893e-06, 'samples': 24218112, 'steps': 47300, 'batch_loss/train': 0.6422019025776535} +12/29/2021 04:33:29 - INFO - codeparrot_training - Step 47301: {'lr': 3.611374544912116e-06, 'samples': 24218624, 'steps': 47301, 'batch_loss/train': 0.7129536317661405} +12/29/2021 04:33:40 - INFO - codeparrot_training - Step 47302: {'lr': 3.6087054128748076e-06, 'samples': 24219136, 'steps': 47302, 'batch_loss/train': 0.7072454784065485} +12/29/2021 04:33:52 - INFO - codeparrot_training - Step 47303: {'lr': 3.606037260396239e-06, 'samples': 24219648, 'steps': 47303, 'batch_loss/train': 0.5973624875769019} +12/29/2021 04:34:03 - INFO - codeparrot_training - Step 47304: {'lr': 3.6033700874870402e-06, 'samples': 24220160, 'steps': 47304, 'batch_loss/train': 0.7245312063023448} +12/29/2021 04:34:13 - INFO - codeparrot_training - Step 47305: {'lr': 3.600703894157786e-06, 'samples': 24220672, 'steps': 47305, 'batch_loss/train': 0.668110373429954} +12/29/2021 04:34:25 - INFO - codeparrot_training - Step 47306: {'lr': 3.598038680419108e-06, 'samples': 24221184, 'steps': 47306, 'batch_loss/train': 0.7178520569577813} +12/29/2021 04:34:36 - INFO - codeparrot_training - Step 47307: {'lr': 3.5953744462816073e-06, 'samples': 24221696, 'steps': 47307, 'batch_loss/train': 0.6568187063094229} +12/29/2021 04:34:47 - INFO - codeparrot_training - Step 47308: {'lr': 3.5927111917558596e-06, 'samples': 24222208, 'steps': 47308, 'batch_loss/train': 0.6132563529827166} +12/29/2021 04:34:59 - INFO - codeparrot_training - Step 47309: {'lr': 3.590048916852412e-06, 'samples': 24222720, 'steps': 47309, 'batch_loss/train': 0.7904037144035101} +12/29/2021 04:35:10 - INFO - codeparrot_training - Step 47310: {'lr': 3.5873876215819223e-06, 'samples': 24223232, 'steps': 47310, 'batch_loss/train': 0.624205077678198} +12/29/2021 04:35:21 - INFO - codeparrot_training - Step 47311: {'lr': 3.5847273059549657e-06, 'samples': 24223744, 'steps': 47311, 'batch_loss/train': 0.6517922348575667} +12/29/2021 04:35:31 - INFO - codeparrot_training - Step 47312: {'lr': 3.5820679699820336e-06, 'samples': 24224256, 'steps': 47312, 'batch_loss/train': 0.7139806197956204} +12/29/2021 04:35:44 - INFO - codeparrot_training - Step 47313: {'lr': 3.5794096136737563e-06, 'samples': 24224768, 'steps': 47313, 'batch_loss/train': 1.1459348504431546} +12/29/2021 04:35:55 - INFO - codeparrot_training - Step 47314: {'lr': 3.5767522370407644e-06, 'samples': 24225280, 'steps': 47314, 'batch_loss/train': 0.6364382595638745} +12/29/2021 04:36:05 - INFO - codeparrot_training - Step 47315: {'lr': 3.574095840093494e-06, 'samples': 24225792, 'steps': 47315, 'batch_loss/train': 0.7429183293133974} +12/29/2021 04:36:17 - INFO - codeparrot_training - Step 47316: {'lr': 3.5714404228425755e-06, 'samples': 24226304, 'steps': 47316, 'batch_loss/train': 0.6749840793199837} +12/29/2021 04:36:28 - INFO - codeparrot_training - Step 47317: {'lr': 3.5687859852986116e-06, 'samples': 24226816, 'steps': 47317, 'batch_loss/train': 0.6625751070678234} +12/29/2021 04:36:39 - INFO - codeparrot_training - Step 47318: {'lr': 3.566132527472066e-06, 'samples': 24227328, 'steps': 47318, 'batch_loss/train': 0.7392337219789624} +12/29/2021 04:36:49 - INFO - codeparrot_training - Step 47319: {'lr': 3.563480049373541e-06, 'samples': 24227840, 'steps': 47319, 'batch_loss/train': 0.7106340536847711} +12/29/2021 04:37:02 - INFO - codeparrot_training - Step 47320: {'lr': 3.560828551013584e-06, 'samples': 24228352, 'steps': 47320, 'batch_loss/train': 0.6102331556903664} +12/29/2021 04:37:13 - INFO - codeparrot_training - Step 47321: {'lr': 3.558178032402687e-06, 'samples': 24228864, 'steps': 47321, 'batch_loss/train': 0.8271698020398617} +12/29/2021 04:37:24 - INFO - codeparrot_training - Step 47322: {'lr': 3.555528493551452e-06, 'samples': 24229376, 'steps': 47322, 'batch_loss/train': 0.7283894188003615} +12/29/2021 04:37:36 - INFO - codeparrot_training - Step 47323: {'lr': 3.552879934470371e-06, 'samples': 24229888, 'steps': 47323, 'batch_loss/train': 0.6177813242538832} +12/29/2021 04:37:47 - INFO - codeparrot_training - Step 47324: {'lr': 3.550232355169963e-06, 'samples': 24230400, 'steps': 47324, 'batch_loss/train': 0.6368045061826706} +12/29/2021 04:37:57 - INFO - codeparrot_training - Step 47325: {'lr': 3.5475857556608037e-06, 'samples': 24230912, 'steps': 47325, 'batch_loss/train': 0.5988444038375746} +12/29/2021 04:38:10 - INFO - codeparrot_training - Step 47326: {'lr': 3.544940135953384e-06, 'samples': 24231424, 'steps': 47326, 'batch_loss/train': 0.6678384467959404} +12/29/2021 04:38:20 - INFO - codeparrot_training - Step 47327: {'lr': 3.5422954960581955e-06, 'samples': 24231936, 'steps': 47327, 'batch_loss/train': 0.6958401543088257} +12/29/2021 04:38:31 - INFO - codeparrot_training - Step 47328: {'lr': 3.539651835985813e-06, 'samples': 24232448, 'steps': 47328, 'batch_loss/train': 0.6591957253403962} +12/29/2021 04:38:44 - INFO - codeparrot_training - Step 47329: {'lr': 3.5370091557467286e-06, 'samples': 24232960, 'steps': 47329, 'batch_loss/train': 0.6833918554475531} +12/29/2021 04:38:54 - INFO - codeparrot_training - Step 47330: {'lr': 3.5343674553514062e-06, 'samples': 24233472, 'steps': 47330, 'batch_loss/train': 0.781298047862947} +12/29/2021 04:39:05 - INFO - codeparrot_training - Step 47331: {'lr': 3.5317267348103642e-06, 'samples': 24233984, 'steps': 47331, 'batch_loss/train': 0.6646168678998947} +12/29/2021 04:39:16 - INFO - codeparrot_training - Step 47332: {'lr': 3.529086994134151e-06, 'samples': 24234496, 'steps': 47332, 'batch_loss/train': 0.5109351000282913} +12/29/2021 04:39:28 - INFO - codeparrot_training - Step 47333: {'lr': 3.5264482333332013e-06, 'samples': 24235008, 'steps': 47333, 'batch_loss/train': 0.7399593945592642} +12/29/2021 04:39:38 - INFO - codeparrot_training - Step 47334: {'lr': 3.523810452418008e-06, 'samples': 24235520, 'steps': 47334, 'batch_loss/train': 0.7409965498372912} +12/29/2021 04:39:49 - INFO - codeparrot_training - Step 47335: {'lr': 3.5211736513991178e-06, 'samples': 24236032, 'steps': 47335, 'batch_loss/train': 0.7997591393068433} +12/29/2021 04:40:01 - INFO - codeparrot_training - Step 47336: {'lr': 3.5185378302869664e-06, 'samples': 24236544, 'steps': 47336, 'batch_loss/train': 0.6098683121963404} +12/29/2021 04:40:12 - INFO - codeparrot_training - Step 47337: {'lr': 3.5159029890920456e-06, 'samples': 24237056, 'steps': 47337, 'batch_loss/train': 0.7621386321261525} +12/29/2021 04:40:22 - INFO - codeparrot_training - Step 47338: {'lr': 3.513269127824792e-06, 'samples': 24237568, 'steps': 47338, 'batch_loss/train': 0.7695396542549133} +12/29/2021 04:40:35 - INFO - codeparrot_training - Step 47339: {'lr': 3.5106362464957242e-06, 'samples': 24238080, 'steps': 47339, 'batch_loss/train': 0.7289596593473107} +12/29/2021 04:40:46 - INFO - codeparrot_training - Step 47340: {'lr': 3.5080043451153342e-06, 'samples': 24238592, 'steps': 47340, 'batch_loss/train': 0.7792439768090844} +12/29/2021 04:40:56 - INFO - codeparrot_training - Step 47341: {'lr': 3.5053734236940027e-06, 'samples': 24239104, 'steps': 47341, 'batch_loss/train': 0.7959854500368237} +12/29/2021 04:41:08 - INFO - codeparrot_training - Step 47342: {'lr': 3.5027434822422213e-06, 'samples': 24239616, 'steps': 47342, 'batch_loss/train': 0.7296266419580206} +12/29/2021 04:41:19 - INFO - codeparrot_training - Step 47343: {'lr': 3.500114520770509e-06, 'samples': 24240128, 'steps': 47343, 'batch_loss/train': 0.7089239866472781} +12/29/2021 04:41:30 - INFO - codeparrot_training - Step 47344: {'lr': 3.4974865392892465e-06, 'samples': 24240640, 'steps': 47344, 'batch_loss/train': 0.7395188137888908} +12/29/2021 04:41:40 - INFO - codeparrot_training - Step 47345: {'lr': 3.4948595378088977e-06, 'samples': 24241152, 'steps': 47345, 'batch_loss/train': 0.7474234509281814} +12/29/2021 04:41:52 - INFO - codeparrot_training - Step 47346: {'lr': 3.492233516339899e-06, 'samples': 24241664, 'steps': 47346, 'batch_loss/train': 0.6502064494416118} +12/29/2021 04:42:03 - INFO - codeparrot_training - Step 47347: {'lr': 3.489608474892714e-06, 'samples': 24242176, 'steps': 47347, 'batch_loss/train': 0.6542672757059336} +12/29/2021 04:42:13 - INFO - codeparrot_training - Step 47348: {'lr': 3.4869844134777786e-06, 'samples': 24242688, 'steps': 47348, 'batch_loss/train': 0.6835815005470067} +12/29/2021 04:42:26 - INFO - codeparrot_training - Step 47349: {'lr': 3.4843613321055013e-06, 'samples': 24243200, 'steps': 47349, 'batch_loss/train': 0.7686041994020343} +12/29/2021 04:42:37 - INFO - codeparrot_training - Step 47350: {'lr': 3.4817392307863183e-06, 'samples': 24243712, 'steps': 47350, 'batch_loss/train': 0.7717043180018663} +12/29/2021 04:42:48 - INFO - codeparrot_training - Step 47351: {'lr': 3.4791181095306655e-06, 'samples': 24244224, 'steps': 47351, 'batch_loss/train': 0.6592451175674796} +12/29/2021 04:43:00 - INFO - codeparrot_training - Step 47352: {'lr': 3.4764979683489517e-06, 'samples': 24244736, 'steps': 47352, 'batch_loss/train': 0.7336127692833543} +12/29/2021 04:43:10 - INFO - codeparrot_training - Step 47353: {'lr': 3.4738788072515847e-06, 'samples': 24245248, 'steps': 47353, 'batch_loss/train': 0.660784785519354} +12/29/2021 04:43:21 - INFO - codeparrot_training - Step 47354: {'lr': 3.4712606262490288e-06, 'samples': 24245760, 'steps': 47354, 'batch_loss/train': 0.931402076035738} +12/29/2021 04:43:32 - INFO - codeparrot_training - Step 47355: {'lr': 3.4686434253516364e-06, 'samples': 24246272, 'steps': 47355, 'batch_loss/train': 0.5672658355906606} +12/29/2021 04:43:44 - INFO - codeparrot_training - Step 47356: {'lr': 3.466027204569816e-06, 'samples': 24246784, 'steps': 47356, 'batch_loss/train': 0.6292000162648037} +12/29/2021 04:43:55 - INFO - codeparrot_training - Step 47357: {'lr': 3.4634119639140036e-06, 'samples': 24247296, 'steps': 47357, 'batch_loss/train': 0.8206584574654698} +12/29/2021 04:44:05 - INFO - codeparrot_training - Step 47358: {'lr': 3.4607977033945803e-06, 'samples': 24247808, 'steps': 47358, 'batch_loss/train': 0.6385585833340883} +12/29/2021 04:44:18 - INFO - codeparrot_training - Step 47359: {'lr': 3.4581844230218985e-06, 'samples': 24248320, 'steps': 47359, 'batch_loss/train': 0.6754618673585355} +12/29/2021 04:44:29 - INFO - codeparrot_training - Step 47360: {'lr': 3.455572122806422e-06, 'samples': 24248832, 'steps': 47360, 'batch_loss/train': 0.7645040974020958} +12/29/2021 04:44:39 - INFO - codeparrot_training - Step 47361: {'lr': 3.4529608027584758e-06, 'samples': 24249344, 'steps': 47361, 'batch_loss/train': 0.748470320366323} +12/29/2021 04:44:50 - INFO - codeparrot_training - Step 47362: {'lr': 3.450350462888496e-06, 'samples': 24249856, 'steps': 47362, 'batch_loss/train': 1.0374248418956995} +12/29/2021 04:45:02 - INFO - codeparrot_training - Step 47363: {'lr': 3.4477411032068083e-06, 'samples': 24250368, 'steps': 47363, 'batch_loss/train': 0.7519551294390112} +12/29/2021 04:45:13 - INFO - codeparrot_training - Step 47364: {'lr': 3.4451327237237927e-06, 'samples': 24250880, 'steps': 47364, 'batch_loss/train': 0.8004484013654292} +12/29/2021 04:45:23 - INFO - codeparrot_training - Step 47365: {'lr': 3.442525324449858e-06, 'samples': 24251392, 'steps': 47365, 'batch_loss/train': 0.7202921099960804} +12/29/2021 04:45:36 - INFO - codeparrot_training - Step 47366: {'lr': 3.439918905395356e-06, 'samples': 24251904, 'steps': 47366, 'batch_loss/train': 0.6796971205621958} +12/29/2021 04:45:47 - INFO - codeparrot_training - Step 47367: {'lr': 3.437313466570613e-06, 'samples': 24252416, 'steps': 47367, 'batch_loss/train': 0.5687627855222672} +12/29/2021 04:45:57 - INFO - codeparrot_training - Step 47368: {'lr': 3.4347090079860087e-06, 'samples': 24252928, 'steps': 47368, 'batch_loss/train': 0.7816730858758092} +12/29/2021 04:46:10 - INFO - codeparrot_training - Step 47369: {'lr': 3.432105529651924e-06, 'samples': 24253440, 'steps': 47369, 'batch_loss/train': 0.7070361441001296} +12/29/2021 04:46:20 - INFO - codeparrot_training - Step 47370: {'lr': 3.4295030315786845e-06, 'samples': 24253952, 'steps': 47370, 'batch_loss/train': 0.8323926543816924} +12/29/2021 04:46:31 - INFO - codeparrot_training - Step 47371: {'lr': 3.4269015137766424e-06, 'samples': 24254464, 'steps': 47371, 'batch_loss/train': 0.7274833684787154} +12/29/2021 04:46:43 - INFO - codeparrot_training - Step 47372: {'lr': 3.424300976256123e-06, 'samples': 24254976, 'steps': 47372, 'batch_loss/train': 0.7177276965230703} +12/29/2021 04:46:54 - INFO - codeparrot_training - Step 47373: {'lr': 3.4217014190274785e-06, 'samples': 24255488, 'steps': 47373, 'batch_loss/train': 0.4911770903854631} +12/29/2021 04:47:04 - INFO - codeparrot_training - Step 47374: {'lr': 3.419102842101035e-06, 'samples': 24256000, 'steps': 47374, 'batch_loss/train': 0.6299622470978647} +12/29/2021 04:47:15 - INFO - codeparrot_training - Step 47375: {'lr': 3.4165052454871724e-06, 'samples': 24256512, 'steps': 47375, 'batch_loss/train': 0.5830975968856364} +12/29/2021 04:47:28 - INFO - codeparrot_training - Step 47376: {'lr': 3.413908629196133e-06, 'samples': 24257024, 'steps': 47376, 'batch_loss/train': 0.7447493094950914} +12/29/2021 04:47:38 - INFO - codeparrot_training - Step 47377: {'lr': 3.411312993238325e-06, 'samples': 24257536, 'steps': 47377, 'batch_loss/train': 0.7465155941899866} +12/29/2021 04:47:49 - INFO - codeparrot_training - Step 47378: {'lr': 3.4087183376240172e-06, 'samples': 24258048, 'steps': 47378, 'batch_loss/train': 0.6542116799391806} +12/29/2021 04:48:01 - INFO - codeparrot_training - Step 47379: {'lr': 3.4061246623635078e-06, 'samples': 24258560, 'steps': 47379, 'batch_loss/train': 0.6700461776927114} +12/29/2021 04:48:12 - INFO - codeparrot_training - Step 47380: {'lr': 3.4035319674671494e-06, 'samples': 24259072, 'steps': 47380, 'batch_loss/train': 0.7872187143657357} +12/29/2021 04:48:22 - INFO - codeparrot_training - Step 47381: {'lr': 3.400940252945267e-06, 'samples': 24259584, 'steps': 47381, 'batch_loss/train': 0.6968765817582607} +12/29/2021 04:48:34 - INFO - codeparrot_training - Step 47382: {'lr': 3.3983495188081304e-06, 'samples': 24260096, 'steps': 47382, 'batch_loss/train': 0.7407632877584547} +12/29/2021 04:48:45 - INFO - codeparrot_training - Step 47383: {'lr': 3.3957597650660087e-06, 'samples': 24260608, 'steps': 47383, 'batch_loss/train': 0.5421847022371367} +12/29/2021 04:48:56 - INFO - codeparrot_training - Step 47384: {'lr': 3.3931709917292554e-06, 'samples': 24261120, 'steps': 47384, 'batch_loss/train': 0.6902845441363752} +12/29/2021 04:49:08 - INFO - codeparrot_training - Step 47385: {'lr': 3.3905831988081393e-06, 'samples': 24261632, 'steps': 47385, 'batch_loss/train': 0.5947052667033859} +12/29/2021 04:49:19 - INFO - codeparrot_training - Step 47386: {'lr': 3.38799638631293e-06, 'samples': 24262144, 'steps': 47386, 'batch_loss/train': 0.6015667754109018} +12/29/2021 04:49:29 - INFO - codeparrot_training - Step 47387: {'lr': 3.385410554253954e-06, 'samples': 24262656, 'steps': 47387, 'batch_loss/train': 0.6622202079743147} +12/29/2021 04:49:40 - INFO - codeparrot_training - Step 47388: {'lr': 3.382825702641451e-06, 'samples': 24263168, 'steps': 47388, 'batch_loss/train': 0.7278862232342362} +12/29/2021 04:49:52 - INFO - codeparrot_training - Step 47389: {'lr': 3.3802418314857197e-06, 'samples': 24263680, 'steps': 47389, 'batch_loss/train': 0.7895924118347466} +12/29/2021 04:50:03 - INFO - codeparrot_training - Step 47390: {'lr': 3.3776589407970017e-06, 'samples': 24264192, 'steps': 47390, 'batch_loss/train': 0.680938511621207} +12/29/2021 04:50:14 - INFO - codeparrot_training - Step 47391: {'lr': 3.3750770305856216e-06, 'samples': 24264704, 'steps': 47391, 'batch_loss/train': 0.8035631151869893} +12/29/2021 04:50:26 - INFO - codeparrot_training - Step 47392: {'lr': 3.3724961008618214e-06, 'samples': 24265216, 'steps': 47392, 'batch_loss/train': 0.6689043997321278} +12/29/2021 04:50:36 - INFO - codeparrot_training - Step 47393: {'lr': 3.3699161516358434e-06, 'samples': 24265728, 'steps': 47393, 'batch_loss/train': 0.6438220572890714} +12/29/2021 04:50:47 - INFO - codeparrot_training - Step 47394: {'lr': 3.367337182917929e-06, 'samples': 24266240, 'steps': 47394, 'batch_loss/train': 0.8062619240954518} +12/29/2021 04:50:59 - INFO - codeparrot_training - Step 47395: {'lr': 3.364759194718403e-06, 'samples': 24266752, 'steps': 47395, 'batch_loss/train': 0.6999079901725054} +12/29/2021 04:51:10 - INFO - codeparrot_training - Step 47396: {'lr': 3.3621821870474246e-06, 'samples': 24267264, 'steps': 47396, 'batch_loss/train': 0.8516150889918208} +12/29/2021 04:51:21 - INFO - codeparrot_training - Step 47397: {'lr': 3.359606159915318e-06, 'samples': 24267776, 'steps': 47397, 'batch_loss/train': 0.7552501063328236} +12/29/2021 04:51:31 - INFO - codeparrot_training - Step 47398: {'lr': 3.3570311133322706e-06, 'samples': 24268288, 'steps': 47398, 'batch_loss/train': 0.49993882002308965} +12/29/2021 04:51:44 - INFO - codeparrot_training - Step 47399: {'lr': 3.354457047308551e-06, 'samples': 24268800, 'steps': 47399, 'batch_loss/train': 0.6501034717075527} +12/29/2021 04:51:55 - INFO - codeparrot_training - Step 47400: {'lr': 3.351883961854346e-06, 'samples': 24269312, 'steps': 47400, 'batch_loss/train': 0.569538950570859} +12/29/2021 04:52:05 - INFO - codeparrot_training - Step 47401: {'lr': 3.349311856979953e-06, 'samples': 24269824, 'steps': 47401, 'batch_loss/train': 0.814878772944212} +12/29/2021 04:52:17 - INFO - codeparrot_training - Step 47402: {'lr': 3.346740732695558e-06, 'samples': 24270336, 'steps': 47402, 'batch_loss/train': 0.6167961825849488} +12/29/2021 04:52:28 - INFO - codeparrot_training - Step 47403: {'lr': 3.344170589011375e-06, 'samples': 24270848, 'steps': 47403, 'batch_loss/train': 0.6701685378793627} +12/29/2021 04:52:38 - INFO - codeparrot_training - Step 47404: {'lr': 3.341601425937674e-06, 'samples': 24271360, 'steps': 47404, 'batch_loss/train': 0.6976188458502293} +12/29/2021 04:52:51 - INFO - codeparrot_training - Step 47405: {'lr': 3.339033243484585e-06, 'samples': 24271872, 'steps': 47405, 'batch_loss/train': 0.6591645264998078} +12/29/2021 04:53:02 - INFO - codeparrot_training - Step 47406: {'lr': 3.336466041662378e-06, 'samples': 24272384, 'steps': 47406, 'batch_loss/train': 0.7508860463276505} +12/29/2021 04:53:12 - INFO - codeparrot_training - Step 47407: {'lr': 3.3338998204812678e-06, 'samples': 24272896, 'steps': 47407, 'batch_loss/train': 0.708690736675635} +12/29/2021 04:53:26 - INFO - codeparrot_training - Step 47408: {'lr': 3.3313345799514394e-06, 'samples': 24273408, 'steps': 47408, 'batch_loss/train': 0.7108374675735831} +12/29/2021 04:53:36 - INFO - codeparrot_training - Step 47409: {'lr': 3.328770320083052e-06, 'samples': 24273920, 'steps': 47409, 'batch_loss/train': 1.6463190375361592} +12/29/2021 04:53:47 - INFO - codeparrot_training - Step 47410: {'lr': 3.3262070408863477e-06, 'samples': 24274432, 'steps': 47410, 'batch_loss/train': 1.0959950685501099} +12/29/2021 04:53:58 - INFO - codeparrot_training - Step 47411: {'lr': 3.323644742371512e-06, 'samples': 24274944, 'steps': 47411, 'batch_loss/train': 1.0180057636462152} +12/29/2021 04:54:10 - INFO - codeparrot_training - Step 47412: {'lr': 3.3210834245487044e-06, 'samples': 24275456, 'steps': 47412, 'batch_loss/train': 1.316737812012434} +12/29/2021 04:54:20 - INFO - codeparrot_training - Step 47413: {'lr': 3.318523087428138e-06, 'samples': 24275968, 'steps': 47413, 'batch_loss/train': 0.8171672141179442} +12/29/2021 04:54:31 - INFO - codeparrot_training - Step 47414: {'lr': 3.315963731019972e-06, 'samples': 24276480, 'steps': 47414, 'batch_loss/train': 0.723424123832956} +12/29/2021 04:54:43 - INFO - codeparrot_training - Step 47415: {'lr': 3.31340535533442e-06, 'samples': 24276992, 'steps': 47415, 'batch_loss/train': 0.6710909595713019} +12/29/2021 04:54:54 - INFO - codeparrot_training - Step 47416: {'lr': 3.3108479603815854e-06, 'samples': 24277504, 'steps': 47416, 'batch_loss/train': 0.7305356496945024} +12/29/2021 04:55:05 - INFO - codeparrot_training - Step 47417: {'lr': 3.3082915461716823e-06, 'samples': 24278016, 'steps': 47417, 'batch_loss/train': 0.7814771635457873} +12/29/2021 04:55:17 - INFO - codeparrot_training - Step 47418: {'lr': 3.3057361127148967e-06, 'samples': 24278528, 'steps': 47418, 'batch_loss/train': 0.6795236342586577} +12/29/2021 04:55:28 - INFO - codeparrot_training - Step 47419: {'lr': 3.3031816600213316e-06, 'samples': 24279040, 'steps': 47419, 'batch_loss/train': 0.6844354132190347} +12/29/2021 04:55:38 - INFO - codeparrot_training - Step 47420: {'lr': 3.300628188101146e-06, 'samples': 24279552, 'steps': 47420, 'batch_loss/train': 0.5346465120092034} +12/29/2021 04:55:49 - INFO - codeparrot_training - Step 47421: {'lr': 3.298075696964553e-06, 'samples': 24280064, 'steps': 47421, 'batch_loss/train': 0.6446169186383486} +12/29/2021 04:56:01 - INFO - codeparrot_training - Step 47422: {'lr': 3.295524186621629e-06, 'samples': 24280576, 'steps': 47422, 'batch_loss/train': 0.7506633764132857} +12/29/2021 04:56:12 - INFO - codeparrot_training - Step 47423: {'lr': 3.2929736570825596e-06, 'samples': 24281088, 'steps': 47423, 'batch_loss/train': 0.6609505680389702} +12/29/2021 04:56:22 - INFO - codeparrot_training - Step 47424: {'lr': 3.290424108357476e-06, 'samples': 24281600, 'steps': 47424, 'batch_loss/train': 0.7310629525454715} +12/29/2021 04:56:35 - INFO - codeparrot_training - Step 47425: {'lr': 3.2878755404565085e-06, 'samples': 24282112, 'steps': 47425, 'batch_loss/train': 0.7540103560313582} +12/29/2021 04:56:45 - INFO - codeparrot_training - Step 47426: {'lr': 3.285327953389816e-06, 'samples': 24282624, 'steps': 47426, 'batch_loss/train': 0.7536340653896332} +12/29/2021 04:56:56 - INFO - codeparrot_training - Step 47427: {'lr': 3.282781347167474e-06, 'samples': 24283136, 'steps': 47427, 'batch_loss/train': 0.692338238004595} +12/29/2021 04:57:06 - INFO - codeparrot_training - Step 47428: {'lr': 3.2802357217996404e-06, 'samples': 24283648, 'steps': 47428, 'batch_loss/train': 0.6649408161174506} +12/29/2021 04:57:19 - INFO - codeparrot_training - Step 47429: {'lr': 3.2776910772964464e-06, 'samples': 24284160, 'steps': 47429, 'batch_loss/train': 0.6007395935012028} +12/29/2021 04:57:30 - INFO - codeparrot_training - Step 47430: {'lr': 3.275147413667995e-06, 'samples': 24284672, 'steps': 47430, 'batch_loss/train': 0.6412005796155427} +12/29/2021 04:57:40 - INFO - codeparrot_training - Step 47431: {'lr': 3.2726047309243614e-06, 'samples': 24285184, 'steps': 47431, 'batch_loss/train': 0.7334294328466058} +12/29/2021 04:57:53 - INFO - codeparrot_training - Step 47432: {'lr': 3.270063029075732e-06, 'samples': 24285696, 'steps': 47432, 'batch_loss/train': 0.549523315159604} +12/29/2021 04:58:03 - INFO - codeparrot_training - Step 47433: {'lr': 3.2675223081321537e-06, 'samples': 24286208, 'steps': 47433, 'batch_loss/train': 0.6747697149403393} +12/29/2021 04:58:14 - INFO - codeparrot_training - Step 47434: {'lr': 3.264982568103758e-06, 'samples': 24286720, 'steps': 47434, 'batch_loss/train': 0.7185896849259734} +12/29/2021 04:58:28 - INFO - codeparrot_training - Step 47435: {'lr': 3.2624438090005924e-06, 'samples': 24287232, 'steps': 47435, 'batch_loss/train': 0.6826984889339656} +12/29/2021 04:58:39 - INFO - codeparrot_training - Step 47436: {'lr': 3.2599060308328155e-06, 'samples': 24287744, 'steps': 47436, 'batch_loss/train': 0.6499341670423746} +12/29/2021 04:58:49 - INFO - codeparrot_training - Step 47437: {'lr': 3.2573692336104745e-06, 'samples': 24288256, 'steps': 47437, 'batch_loss/train': 0.8947087964043021} +12/29/2021 04:59:02 - INFO - codeparrot_training - Step 47438: {'lr': 3.2548334173436723e-06, 'samples': 24288768, 'steps': 47438, 'batch_loss/train': 0.6300102039240301} +12/29/2021 04:59:12 - INFO - codeparrot_training - Step 47439: {'lr': 3.2522985820424845e-06, 'samples': 24289280, 'steps': 47439, 'batch_loss/train': 0.8181042142678052} +12/29/2021 04:59:23 - INFO - codeparrot_training - Step 47440: {'lr': 3.249764727716986e-06, 'samples': 24289792, 'steps': 47440, 'batch_loss/train': 0.797463639639318} +12/29/2021 04:59:33 - INFO - codeparrot_training - Step 47441: {'lr': 3.2472318543772805e-06, 'samples': 24290304, 'steps': 47441, 'batch_loss/train': 0.8428957457654178} +12/29/2021 04:59:46 - INFO - codeparrot_training - Step 47442: {'lr': 3.244699962033387e-06, 'samples': 24290816, 'steps': 47442, 'batch_loss/train': 0.6830023573711514} +12/29/2021 04:59:56 - INFO - codeparrot_training - Step 47443: {'lr': 3.2421690506953817e-06, 'samples': 24291328, 'steps': 47443, 'batch_loss/train': 0.7102387580089271} +12/29/2021 05:00:07 - INFO - codeparrot_training - Step 47444: {'lr': 3.2396391203733665e-06, 'samples': 24291840, 'steps': 47444, 'batch_loss/train': 0.775171248242259} +12/29/2021 05:00:21 - INFO - codeparrot_training - Step 47445: {'lr': 3.237110171077362e-06, 'samples': 24292352, 'steps': 47445, 'batch_loss/train': 0.7781139467842877} +12/29/2021 05:00:31 - INFO - codeparrot_training - Step 47446: {'lr': 3.2345822028174155e-06, 'samples': 24292864, 'steps': 47446, 'batch_loss/train': 0.7223774739541113} +12/29/2021 05:00:42 - INFO - codeparrot_training - Step 47447: {'lr': 3.2320552156036298e-06, 'samples': 24293376, 'steps': 47447, 'batch_loss/train': 0.7030103206634521} +12/29/2021 05:00:54 - INFO - codeparrot_training - Step 47448: {'lr': 3.2295292094459973e-06, 'samples': 24293888, 'steps': 47448, 'batch_loss/train': 0.7095646522939205} +12/29/2021 05:01:05 - INFO - codeparrot_training - Step 47449: {'lr': 3.227004184354565e-06, 'samples': 24294400, 'steps': 47449, 'batch_loss/train': 0.6754389866255224} +12/29/2021 05:01:15 - INFO - codeparrot_training - Step 47450: {'lr': 3.2244801403394363e-06, 'samples': 24294912, 'steps': 47450, 'batch_loss/train': 0.6968292728997767} +12/29/2021 05:01:29 - INFO - codeparrot_training - Step 47451: {'lr': 3.221957077410548e-06, 'samples': 24295424, 'steps': 47451, 'batch_loss/train': 0.6676771151833236} +12/29/2021 05:01:40 - INFO - codeparrot_training - Step 47452: {'lr': 3.219434995578002e-06, 'samples': 24295936, 'steps': 47452, 'batch_loss/train': 0.6464381862897426} +12/29/2021 05:01:51 - INFO - codeparrot_training - Step 47453: {'lr': 3.216913894851792e-06, 'samples': 24296448, 'steps': 47453, 'batch_loss/train': 0.7092203111387789} +12/29/2021 05:02:01 - INFO - codeparrot_training - Step 47454: {'lr': 3.214393775241936e-06, 'samples': 24296960, 'steps': 47454, 'batch_loss/train': 0.7742323046550155} +12/29/2021 05:02:13 - INFO - codeparrot_training - Step 47455: {'lr': 3.211874636758483e-06, 'samples': 24297472, 'steps': 47455, 'batch_loss/train': 0.7615192933008075} +12/29/2021 05:02:24 - INFO - codeparrot_training - Step 47456: {'lr': 3.2093564794114517e-06, 'samples': 24297984, 'steps': 47456, 'batch_loss/train': 0.906210379791446} +12/29/2021 05:02:35 - INFO - codeparrot_training - Step 47457: {'lr': 3.206839303210807e-06, 'samples': 24298496, 'steps': 47457, 'batch_loss/train': 0.704715080326423} +12/29/2021 05:02:47 - INFO - codeparrot_training - Step 47458: {'lr': 3.2043231081665956e-06, 'samples': 24299008, 'steps': 47458, 'batch_loss/train': 0.716948383487761} +12/29/2021 05:02:57 - INFO - codeparrot_training - Step 47459: {'lr': 3.2018078942888107e-06, 'samples': 24299520, 'steps': 47459, 'batch_loss/train': 0.7437311722896993} +12/29/2021 05:03:08 - INFO - codeparrot_training - Step 47460: {'lr': 3.1992936615874434e-06, 'samples': 24300032, 'steps': 47460, 'batch_loss/train': 0.7491009393706918} +12/29/2021 05:03:22 - INFO - codeparrot_training - Step 47461: {'lr': 3.196780410072486e-06, 'samples': 24300544, 'steps': 47461, 'batch_loss/train': 0.7415916176396422} +12/29/2021 05:03:32 - INFO - codeparrot_training - Step 47462: {'lr': 3.1942681397539586e-06, 'samples': 24301056, 'steps': 47462, 'batch_loss/train': 0.7177171325311065} +12/29/2021 05:03:43 - INFO - codeparrot_training - Step 47463: {'lr': 3.191756850641825e-06, 'samples': 24301568, 'steps': 47463, 'batch_loss/train': 0.8698350293561816} +12/29/2021 05:03:54 - INFO - codeparrot_training - Step 47464: {'lr': 3.189246542746077e-06, 'samples': 24302080, 'steps': 47464, 'batch_loss/train': 0.8126755002886057} +12/29/2021 05:04:06 - INFO - codeparrot_training - Step 47465: {'lr': 3.186737216076707e-06, 'samples': 24302592, 'steps': 47465, 'batch_loss/train': 0.7420995645225048} +12/29/2021 05:04:16 - INFO - codeparrot_training - Step 47466: {'lr': 3.1842288706436517e-06, 'samples': 24303104, 'steps': 47466, 'batch_loss/train': 0.7085259594023228} +12/29/2021 05:04:27 - INFO - codeparrot_training - Step 47467: {'lr': 3.1817215064569025e-06, 'samples': 24303616, 'steps': 47467, 'batch_loss/train': 0.6840747549431399} +12/29/2021 05:04:41 - INFO - codeparrot_training - Step 47468: {'lr': 3.179215123526452e-06, 'samples': 24304128, 'steps': 47468, 'batch_loss/train': 0.7605212684720755} +12/29/2021 05:04:52 - INFO - codeparrot_training - Step 47469: {'lr': 3.1767097218622364e-06, 'samples': 24304640, 'steps': 47469, 'batch_loss/train': 0.7590699037536979} +12/29/2021 05:05:02 - INFO - codeparrot_training - Step 47470: {'lr': 3.174205301474248e-06, 'samples': 24305152, 'steps': 47470, 'batch_loss/train': 0.5958257112361025} +12/29/2021 05:05:14 - INFO - codeparrot_training - Step 47471: {'lr': 3.1717018623724224e-06, 'samples': 24305664, 'steps': 47471, 'batch_loss/train': 0.5508757452480495} +12/29/2021 05:05:25 - INFO - codeparrot_training - Step 47472: {'lr': 3.1691994045666693e-06, 'samples': 24306176, 'steps': 47472, 'batch_loss/train': 0.5973161019501276} +12/29/2021 05:05:36 - INFO - codeparrot_training - Step 47473: {'lr': 3.166697928067036e-06, 'samples': 24306688, 'steps': 47473, 'batch_loss/train': 0.7601380879059434} +12/29/2021 05:05:46 - INFO - codeparrot_training - Step 47474: {'lr': 3.1641974328833755e-06, 'samples': 24307200, 'steps': 47474, 'batch_loss/train': 0.6094368319027126} +12/29/2021 05:05:58 - INFO - codeparrot_training - Step 47475: {'lr': 3.1616979190256523e-06, 'samples': 24307712, 'steps': 47475, 'batch_loss/train': 0.7096839984878898} +12/29/2021 05:06:09 - INFO - codeparrot_training - Step 47476: {'lr': 3.1591993865038303e-06, 'samples': 24308224, 'steps': 47476, 'batch_loss/train': 0.771985637024045} +12/29/2021 05:06:20 - INFO - codeparrot_training - Step 47477: {'lr': 3.1567018353278464e-06, 'samples': 24308736, 'steps': 47477, 'batch_loss/train': 0.718559896107763} +12/29/2021 05:06:32 - INFO - codeparrot_training - Step 47478: {'lr': 3.1542052655075813e-06, 'samples': 24309248, 'steps': 47478, 'batch_loss/train': 0.655148749705404} +12/29/2021 05:06:42 - INFO - codeparrot_training - Step 47479: {'lr': 3.151709677053e-06, 'samples': 24309760, 'steps': 47479, 'batch_loss/train': 0.7883134530857205} +12/29/2021 05:06:53 - INFO - codeparrot_training - Step 47480: {'lr': 3.1492150699740373e-06, 'samples': 24310272, 'steps': 47480, 'batch_loss/train': 0.7488402351737022} +12/29/2021 05:07:07 - INFO - codeparrot_training - Step 47481: {'lr': 3.146721444280548e-06, 'samples': 24310784, 'steps': 47481, 'batch_loss/train': 0.8242170724552125} +12/29/2021 05:07:18 - INFO - codeparrot_training - Step 47482: {'lr': 3.1442287999825238e-06, 'samples': 24311296, 'steps': 47482, 'batch_loss/train': 0.685222020605579} +12/29/2021 05:07:28 - INFO - codeparrot_training - Step 47483: {'lr': 3.1417371370898175e-06, 'samples': 24311808, 'steps': 47483, 'batch_loss/train': 0.7438780744560063} +12/29/2021 05:07:39 - INFO - codeparrot_training - Step 47484: {'lr': 3.139246455612366e-06, 'samples': 24312320, 'steps': 47484, 'batch_loss/train': 0.7551612888928503} +12/29/2021 05:07:51 - INFO - codeparrot_training - Step 47485: {'lr': 3.1367567555600494e-06, 'samples': 24312832, 'steps': 47485, 'batch_loss/train': 0.7747432810720056} +12/29/2021 05:08:01 - INFO - codeparrot_training - Step 47486: {'lr': 3.134268036942778e-06, 'samples': 24313344, 'steps': 47486, 'batch_loss/train': 0.7017998951487243} +12/29/2021 05:08:12 - INFO - codeparrot_training - Step 47487: {'lr': 3.1317802997704316e-06, 'samples': 24313856, 'steps': 47487, 'batch_loss/train': 0.6414475687779486} +12/29/2021 05:08:24 - INFO - codeparrot_training - Step 47488: {'lr': 3.129293544052947e-06, 'samples': 24314368, 'steps': 47488, 'batch_loss/train': 0.6384420916438103} +12/29/2021 05:08:35 - INFO - codeparrot_training - Step 47489: {'lr': 3.1268077698001497e-06, 'samples': 24314880, 'steps': 47489, 'batch_loss/train': 0.7039547557942569} +12/29/2021 05:08:46 - INFO - codeparrot_training - Step 47490: {'lr': 3.1243229770219484e-06, 'samples': 24315392, 'steps': 47490, 'batch_loss/train': 0.6415463690645993} +12/29/2021 05:08:59 - INFO - codeparrot_training - Step 47491: {'lr': 3.1218391657282243e-06, 'samples': 24315904, 'steps': 47491, 'batch_loss/train': 0.7908311244100332} +12/29/2021 05:09:10 - INFO - codeparrot_training - Step 47492: {'lr': 3.119356335928858e-06, 'samples': 24316416, 'steps': 47492, 'batch_loss/train': 0.7718366426415741} +12/29/2021 05:09:21 - INFO - codeparrot_training - Step 47493: {'lr': 3.116874487633703e-06, 'samples': 24316928, 'steps': 47493, 'batch_loss/train': 0.6573979163076729} +12/29/2021 05:09:31 - INFO - codeparrot_training - Step 47494: {'lr': 3.1143936208526402e-06, 'samples': 24317440, 'steps': 47494, 'batch_loss/train': 0.7542474246583879} +12/29/2021 05:09:43 - INFO - codeparrot_training - Step 47495: {'lr': 3.1119137355955507e-06, 'samples': 24317952, 'steps': 47495, 'batch_loss/train': 0.7248191959224641} +12/29/2021 05:09:54 - INFO - codeparrot_training - Step 47496: {'lr': 3.1094348318722322e-06, 'samples': 24318464, 'steps': 47496, 'batch_loss/train': 0.6599957786384039} +12/29/2021 05:10:05 - INFO - codeparrot_training - Step 47497: {'lr': 3.106956909692621e-06, 'samples': 24318976, 'steps': 47497, 'batch_loss/train': 1.0728646079078317} +12/29/2021 05:10:17 - INFO - codeparrot_training - Step 47498: {'lr': 3.104479969066487e-06, 'samples': 24319488, 'steps': 47498, 'batch_loss/train': 0.6726429772097617} +12/29/2021 05:10:27 - INFO - codeparrot_training - Step 47499: {'lr': 3.1020040100037393e-06, 'samples': 24320000, 'steps': 47499, 'batch_loss/train': 0.7271024992223829} +12/29/2021 05:10:27 - INFO - codeparrot_training - Evaluating and saving model checkpoint +12/29/2021 05:13:49 - INFO - codeparrot_training - Step 47500: {'loss/eval': 0.7404438257217407, 'perplexity': 2.0968658924102783}