diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1266 @@ +[2023-02-24 23:17:39,804][00517] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-24 23:17:39,811][00517] Rollout worker 0 uses device cpu +[2023-02-24 23:17:39,816][00517] Rollout worker 1 uses device cpu +[2023-02-24 23:17:39,818][00517] Rollout worker 2 uses device cpu +[2023-02-24 23:17:39,822][00517] Rollout worker 3 uses device cpu +[2023-02-24 23:17:39,827][00517] Rollout worker 4 uses device cpu +[2023-02-24 23:17:39,831][00517] Rollout worker 5 uses device cpu +[2023-02-24 23:17:39,832][00517] Rollout worker 6 uses device cpu +[2023-02-24 23:17:39,836][00517] Rollout worker 7 uses device cpu +[2023-02-24 23:17:40,463][00517] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 23:17:40,466][00517] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-24 23:17:40,579][00517] Starting all processes... +[2023-02-24 23:17:40,586][00517] Starting process learner_proc0 +[2023-02-24 23:17:40,802][00517] Starting all processes... +[2023-02-24 23:17:40,857][00517] Starting process inference_proc0-0 +[2023-02-24 23:17:40,858][00517] Starting process rollout_proc0 +[2023-02-24 23:17:40,868][00517] Starting process rollout_proc1 +[2023-02-24 23:17:40,887][00517] Starting process rollout_proc2 +[2023-02-24 23:17:40,935][00517] Starting process rollout_proc3 +[2023-02-24 23:17:40,942][00517] Starting process rollout_proc4 +[2023-02-24 23:17:40,942][00517] Starting process rollout_proc5 +[2023-02-24 23:17:40,942][00517] Starting process rollout_proc6 +[2023-02-24 23:17:40,942][00517] Starting process rollout_proc7 +[2023-02-24 23:17:52,727][12151] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 23:17:52,728][12151] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-24 23:17:52,858][12170] Worker 0 uses CPU cores [0] +[2023-02-24 23:17:52,886][12171] Worker 1 uses CPU cores [1] +[2023-02-24 23:17:53,263][12169] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 23:17:53,268][12169] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-24 23:17:53,398][12177] Worker 7 uses CPU cores [1] +[2023-02-24 23:17:53,744][12176] Worker 6 uses CPU cores [0] +[2023-02-24 23:17:53,742][12173] Worker 4 uses CPU cores [0] +[2023-02-24 23:17:53,984][12172] Worker 2 uses CPU cores [0] +[2023-02-24 23:17:53,989][12174] Worker 3 uses CPU cores [1] +[2023-02-24 23:17:54,009][12175] Worker 5 uses CPU cores [1] +[2023-02-24 23:17:54,097][12151] Num visible devices: 1 +[2023-02-24 23:17:54,097][12169] Num visible devices: 1 +[2023-02-24 23:17:54,138][12151] Starting seed is not provided +[2023-02-24 23:17:54,140][12151] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 23:17:54,141][12151] Initializing actor-critic model on device cuda:0 +[2023-02-24 23:17:54,142][12151] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 23:17:54,144][12151] RunningMeanStd input shape: (1,) +[2023-02-24 23:17:54,181][12151] ConvEncoder: input_channels=3 +[2023-02-24 23:17:54,718][12151] Conv encoder output size: 512 +[2023-02-24 23:17:54,719][12151] Policy head output size: 512 +[2023-02-24 23:17:54,775][12151] Created Actor Critic model with architecture: +[2023-02-24 23:17:54,775][12151] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-24 23:18:00,424][00517] Heartbeat connected on Batcher_0 +[2023-02-24 23:18:00,463][00517] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-24 23:18:00,485][00517] Heartbeat connected on RolloutWorker_w0 +[2023-02-24 23:18:00,499][00517] Heartbeat connected on RolloutWorker_w1 +[2023-02-24 23:18:00,520][00517] Heartbeat connected on RolloutWorker_w2 +[2023-02-24 23:18:00,542][00517] Heartbeat connected on RolloutWorker_w3 +[2023-02-24 23:18:00,557][00517] Heartbeat connected on RolloutWorker_w4 +[2023-02-24 23:18:00,563][00517] Heartbeat connected on RolloutWorker_w5 +[2023-02-24 23:18:00,573][00517] Heartbeat connected on RolloutWorker_w6 +[2023-02-24 23:18:00,578][00517] Heartbeat connected on RolloutWorker_w7 +[2023-02-24 23:18:02,285][12151] Using optimizer +[2023-02-24 23:18:02,287][12151] No checkpoints found +[2023-02-24 23:18:02,288][12151] Did not load from checkpoint, starting from scratch! +[2023-02-24 23:18:02,288][12151] Initialized policy 0 weights for model version 0 +[2023-02-24 23:18:02,291][12151] LearnerWorker_p0 finished initialization! +[2023-02-24 23:18:02,292][00517] Heartbeat connected on LearnerWorker_p0 +[2023-02-24 23:18:02,293][12151] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 23:18:02,399][12169] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 23:18:02,401][12169] RunningMeanStd input shape: (1,) +[2023-02-24 23:18:02,418][12169] ConvEncoder: input_channels=3 +[2023-02-24 23:18:02,521][12169] Conv encoder output size: 512 +[2023-02-24 23:18:02,521][12169] Policy head output size: 512 +[2023-02-24 23:18:02,944][00517] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 23:18:04,881][00517] Inference worker 0-0 is ready! +[2023-02-24 23:18:04,883][00517] All inference workers are ready! Signal rollout workers to start! +[2023-02-24 23:18:05,004][12171] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,018][12175] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,035][12177] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,044][12174] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,055][12172] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,050][12173] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,064][12170] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,067][12176] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:18:05,950][12172] Decorrelating experience for 0 frames... +[2023-02-24 23:18:05,953][12173] Decorrelating experience for 0 frames... +[2023-02-24 23:18:06,248][12174] Decorrelating experience for 0 frames... +[2023-02-24 23:18:06,251][12175] Decorrelating experience for 0 frames... +[2023-02-24 23:18:06,256][12171] Decorrelating experience for 0 frames... +[2023-02-24 23:18:07,280][12170] Decorrelating experience for 0 frames... +[2023-02-24 23:18:07,312][12175] Decorrelating experience for 32 frames... +[2023-02-24 23:18:07,314][12173] Decorrelating experience for 32 frames... +[2023-02-24 23:18:07,330][12171] Decorrelating experience for 32 frames... +[2023-02-24 23:18:07,337][12172] Decorrelating experience for 32 frames... +[2023-02-24 23:18:07,389][12177] Decorrelating experience for 0 frames... +[2023-02-24 23:18:07,795][12176] Decorrelating experience for 0 frames... +[2023-02-24 23:18:07,944][00517] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 23:18:08,126][12170] Decorrelating experience for 32 frames... +[2023-02-24 23:18:08,961][12174] Decorrelating experience for 32 frames... +[2023-02-24 23:18:09,025][12170] Decorrelating experience for 64 frames... +[2023-02-24 23:18:09,140][12177] Decorrelating experience for 32 frames... +[2023-02-24 23:18:09,451][12171] Decorrelating experience for 64 frames... +[2023-02-24 23:18:09,842][12175] Decorrelating experience for 64 frames... +[2023-02-24 23:18:10,519][12174] Decorrelating experience for 64 frames... +[2023-02-24 23:18:10,670][12170] Decorrelating experience for 96 frames... +[2023-02-24 23:18:10,822][12176] Decorrelating experience for 32 frames... +[2023-02-24 23:18:11,173][12175] Decorrelating experience for 96 frames... +[2023-02-24 23:18:11,585][12171] Decorrelating experience for 96 frames... +[2023-02-24 23:18:11,596][12173] Decorrelating experience for 64 frames... +[2023-02-24 23:18:12,590][12172] Decorrelating experience for 64 frames... +[2023-02-24 23:18:12,633][12177] Decorrelating experience for 64 frames... +[2023-02-24 23:18:12,639][12174] Decorrelating experience for 96 frames... +[2023-02-24 23:18:12,946][00517] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 23:18:13,454][12177] Decorrelating experience for 96 frames... +[2023-02-24 23:18:14,159][12176] Decorrelating experience for 64 frames... +[2023-02-24 23:18:14,413][12173] Decorrelating experience for 96 frames... +[2023-02-24 23:18:14,823][12172] Decorrelating experience for 96 frames... +[2023-02-24 23:18:15,414][12176] Decorrelating experience for 96 frames... +[2023-02-24 23:18:17,944][00517] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 51.5. Samples: 772. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 23:18:17,948][00517] Avg episode reward: [(0, '1.282')] +[2023-02-24 23:18:19,010][12151] Signal inference workers to stop experience collection... +[2023-02-24 23:18:19,062][12169] InferenceWorker_p0-w0: stopping experience collection +[2023-02-24 23:18:21,633][12151] Signal inference workers to resume experience collection... +[2023-02-24 23:18:21,634][12169] InferenceWorker_p0-w0: resuming experience collection +[2023-02-24 23:18:22,944][00517] Fps is (10 sec: 409.7, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 4096. Throughput: 0: 156.4. Samples: 3128. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-24 23:18:22,952][00517] Avg episode reward: [(0, '2.606')] +[2023-02-24 23:18:27,948][00517] Fps is (10 sec: 2456.8, 60 sec: 982.9, 300 sec: 982.9). Total num frames: 24576. Throughput: 0: 223.8. Samples: 5596. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0) +[2023-02-24 23:18:27,952][00517] Avg episode reward: [(0, '3.622')] +[2023-02-24 23:18:32,945][00517] Fps is (10 sec: 3276.7, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 36864. Throughput: 0: 319.7. Samples: 9592. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:18:32,955][00517] Avg episode reward: [(0, '3.768')] +[2023-02-24 23:18:33,423][12169] Updated weights for policy 0, policy_version 10 (0.0025) +[2023-02-24 23:18:37,944][00517] Fps is (10 sec: 2458.4, 60 sec: 1404.3, 300 sec: 1404.3). Total num frames: 49152. Throughput: 0: 382.7. Samples: 13394. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-24 23:18:37,952][00517] Avg episode reward: [(0, '4.225')] +[2023-02-24 23:18:42,945][00517] Fps is (10 sec: 3276.9, 60 sec: 1740.8, 300 sec: 1740.8). Total num frames: 69632. Throughput: 0: 410.1. Samples: 16404. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:18:42,947][00517] Avg episode reward: [(0, '4.239')] +[2023-02-24 23:18:45,205][12169] Updated weights for policy 0, policy_version 20 (0.0028) +[2023-02-24 23:18:47,944][00517] Fps is (10 sec: 4096.0, 60 sec: 2002.5, 300 sec: 2002.5). Total num frames: 90112. Throughput: 0: 498.5. Samples: 22432. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:18:47,947][00517] Avg episode reward: [(0, '4.251')] +[2023-02-24 23:18:52,945][00517] Fps is (10 sec: 2867.0, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 98304. Throughput: 0: 568.3. Samples: 25574. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-24 23:18:52,948][00517] Avg episode reward: [(0, '4.186')] +[2023-02-24 23:18:57,945][00517] Fps is (10 sec: 2047.9, 60 sec: 2010.8, 300 sec: 2010.8). Total num frames: 110592. Throughput: 0: 602.7. Samples: 27122. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:18:57,952][00517] Avg episode reward: [(0, '4.150')] +[2023-02-24 23:18:57,954][12151] Saving new best policy, reward=4.150! +[2023-02-24 23:19:02,944][00517] Fps is (10 sec: 2048.2, 60 sec: 1979.7, 300 sec: 1979.7). Total num frames: 118784. Throughput: 0: 652.8. Samples: 30150. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:19:02,947][00517] Avg episode reward: [(0, '4.342')] +[2023-02-24 23:19:02,966][12151] Saving new best policy, reward=4.342! +[2023-02-24 23:19:04,053][12169] Updated weights for policy 0, policy_version 30 (0.0024) +[2023-02-24 23:19:07,944][00517] Fps is (10 sec: 2457.7, 60 sec: 2252.8, 300 sec: 2079.5). Total num frames: 135168. Throughput: 0: 709.6. Samples: 35060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:19:07,953][00517] Avg episode reward: [(0, '4.254')] +[2023-02-24 23:19:12,944][00517] Fps is (10 sec: 3686.4, 60 sec: 2594.2, 300 sec: 2223.5). Total num frames: 155648. Throughput: 0: 721.8. Samples: 38074. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:19:12,953][00517] Avg episode reward: [(0, '4.351')] +[2023-02-24 23:19:12,967][12151] Saving new best policy, reward=4.351! +[2023-02-24 23:19:14,454][12169] Updated weights for policy 0, policy_version 40 (0.0033) +[2023-02-24 23:19:17,944][00517] Fps is (10 sec: 3686.4, 60 sec: 2867.2, 300 sec: 2293.8). Total num frames: 172032. Throughput: 0: 740.9. Samples: 42930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:19:17,950][00517] Avg episode reward: [(0, '4.451')] +[2023-02-24 23:19:17,954][12151] Saving new best policy, reward=4.451! +[2023-02-24 23:19:22,945][00517] Fps is (10 sec: 2457.4, 60 sec: 2935.4, 300 sec: 2252.8). Total num frames: 180224. Throughput: 0: 738.5. Samples: 46626. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:19:22,950][00517] Avg episode reward: [(0, '4.493')] +[2023-02-24 23:19:22,994][12151] Saving new best policy, reward=4.493! +[2023-02-24 23:19:27,944][00517] Fps is (10 sec: 2867.2, 60 sec: 2935.6, 300 sec: 2361.2). Total num frames: 200704. Throughput: 0: 719.3. Samples: 48774. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:19:27,948][00517] Avg episode reward: [(0, '4.569')] +[2023-02-24 23:19:27,951][12151] Saving new best policy, reward=4.569! +[2023-02-24 23:19:28,951][12169] Updated weights for policy 0, policy_version 50 (0.0024) +[2023-02-24 23:19:32,944][00517] Fps is (10 sec: 3686.8, 60 sec: 3003.8, 300 sec: 2412.1). Total num frames: 217088. Throughput: 0: 717.6. Samples: 54724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:19:32,946][00517] Avg episode reward: [(0, '4.438')] +[2023-02-24 23:19:32,960][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000053_217088.pth... +[2023-02-24 23:19:37,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 2457.6). Total num frames: 233472. Throughput: 0: 745.4. Samples: 59118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:19:37,954][00517] Avg episode reward: [(0, '4.342')] +[2023-02-24 23:19:42,414][12169] Updated weights for policy 0, policy_version 60 (0.0013) +[2023-02-24 23:19:42,944][00517] Fps is (10 sec: 2867.2, 60 sec: 2935.5, 300 sec: 2457.6). Total num frames: 245760. Throughput: 0: 753.1. Samples: 61012. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:19:42,951][00517] Avg episode reward: [(0, '4.586')] +[2023-02-24 23:19:42,956][12151] Saving new best policy, reward=4.586! +[2023-02-24 23:19:47,944][00517] Fps is (10 sec: 2457.6, 60 sec: 2798.9, 300 sec: 2457.6). Total num frames: 258048. Throughput: 0: 774.3. Samples: 64994. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:19:47,953][00517] Avg episode reward: [(0, '4.607')] +[2023-02-24 23:19:48,034][12151] Saving new best policy, reward=4.607! +[2023-02-24 23:19:52,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.8, 300 sec: 2532.1). Total num frames: 278528. Throughput: 0: 795.7. Samples: 70868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:19:52,947][00517] Avg episode reward: [(0, '4.660')] +[2023-02-24 23:19:52,963][12151] Saving new best policy, reward=4.660! +[2023-02-24 23:19:54,411][12169] Updated weights for policy 0, policy_version 70 (0.0014) +[2023-02-24 23:19:57,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3072.0, 300 sec: 2564.5). Total num frames: 294912. Throughput: 0: 793.7. Samples: 73790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:19:57,952][00517] Avg episode reward: [(0, '4.671')] +[2023-02-24 23:19:57,954][12151] Saving new best policy, reward=4.671! +[2023-02-24 23:20:02,945][00517] Fps is (10 sec: 2867.1, 60 sec: 3140.3, 300 sec: 2560.0). Total num frames: 307200. Throughput: 0: 769.7. Samples: 77566. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:20:02,947][00517] Avg episode reward: [(0, '4.569')] +[2023-02-24 23:20:07,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 2588.7). Total num frames: 323584. Throughput: 0: 777.5. Samples: 81614. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:07,952][00517] Avg episode reward: [(0, '4.434')] +[2023-02-24 23:20:08,726][12169] Updated weights for policy 0, policy_version 80 (0.0014) +[2023-02-24 23:20:12,944][00517] Fps is (10 sec: 3686.5, 60 sec: 3140.3, 300 sec: 2646.6). Total num frames: 344064. Throughput: 0: 797.0. Samples: 84638. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:12,948][00517] Avg episode reward: [(0, '4.335')] +[2023-02-24 23:20:17,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 2670.0). Total num frames: 360448. Throughput: 0: 800.7. Samples: 90754. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:17,949][00517] Avg episode reward: [(0, '4.311')] +[2023-02-24 23:20:19,955][12169] Updated weights for policy 0, policy_version 90 (0.0050) +[2023-02-24 23:20:22,968][00517] Fps is (10 sec: 2860.3, 60 sec: 3207.3, 300 sec: 2661.9). Total num frames: 372736. Throughput: 0: 791.3. Samples: 94744. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:20:22,977][00517] Avg episode reward: [(0, '4.418')] +[2023-02-24 23:20:27,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 2683.6). Total num frames: 389120. Throughput: 0: 791.9. Samples: 96646. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:20:27,948][00517] Avg episode reward: [(0, '4.629')] +[2023-02-24 23:20:32,944][00517] Fps is (10 sec: 3284.7, 60 sec: 3140.3, 300 sec: 2703.4). Total num frames: 405504. Throughput: 0: 821.0. Samples: 101938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:20:32,948][00517] Avg episode reward: [(0, '4.857')] +[2023-02-24 23:20:32,960][12151] Saving new best policy, reward=4.857! +[2023-02-24 23:20:32,965][12169] Updated weights for policy 0, policy_version 100 (0.0020) +[2023-02-24 23:20:37,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 2748.3). Total num frames: 425984. Throughput: 0: 822.8. Samples: 107894. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:37,946][00517] Avg episode reward: [(0, '4.868')] +[2023-02-24 23:20:38,034][12151] Saving new best policy, reward=4.868! +[2023-02-24 23:20:42,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 2764.8). Total num frames: 442368. Throughput: 0: 800.6. Samples: 109816. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:42,951][00517] Avg episode reward: [(0, '4.781')] +[2023-02-24 23:20:46,175][12169] Updated weights for policy 0, policy_version 110 (0.0014) +[2023-02-24 23:20:47,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 2755.5). Total num frames: 454656. Throughput: 0: 803.0. Samples: 113700. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:20:47,951][00517] Avg episode reward: [(0, '4.753')] +[2023-02-24 23:20:52,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 2770.8). Total num frames: 471040. Throughput: 0: 827.6. Samples: 118854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:20:52,948][00517] Avg episode reward: [(0, '4.735')] +[2023-02-24 23:20:57,495][12169] Updated weights for policy 0, policy_version 120 (0.0019) +[2023-02-24 23:20:57,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 2808.7). Total num frames: 491520. Throughput: 0: 827.3. Samples: 121866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:20:57,948][00517] Avg episode reward: [(0, '4.582')] +[2023-02-24 23:21:02,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2798.9). Total num frames: 503808. Throughput: 0: 800.1. Samples: 126760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:02,960][00517] Avg episode reward: [(0, '4.494')] +[2023-02-24 23:21:07,946][00517] Fps is (10 sec: 2866.6, 60 sec: 3276.7, 300 sec: 2811.8). Total num frames: 520192. Throughput: 0: 796.7. Samples: 130580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:07,952][00517] Avg episode reward: [(0, '4.825')] +[2023-02-24 23:21:12,036][12169] Updated weights for policy 0, policy_version 130 (0.0037) +[2023-02-24 23:21:12,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 2802.5). Total num frames: 532480. Throughput: 0: 800.0. Samples: 132644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:21:12,953][00517] Avg episode reward: [(0, '5.079')] +[2023-02-24 23:21:12,967][12151] Saving new best policy, reward=5.079! +[2023-02-24 23:21:17,945][00517] Fps is (10 sec: 3277.4, 60 sec: 3208.5, 300 sec: 2835.7). Total num frames: 552960. Throughput: 0: 807.4. Samples: 138270. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:17,948][00517] Avg episode reward: [(0, '5.386')] +[2023-02-24 23:21:17,958][12151] Saving new best policy, reward=5.386! +[2023-02-24 23:21:22,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3278.1, 300 sec: 2846.7). Total num frames: 569344. Throughput: 0: 783.6. Samples: 143154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:21:22,949][00517] Avg episode reward: [(0, '5.431')] +[2023-02-24 23:21:22,963][12151] Saving new best policy, reward=5.431! +[2023-02-24 23:21:23,999][12169] Updated weights for policy 0, policy_version 140 (0.0019) +[2023-02-24 23:21:27,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 2837.2). Total num frames: 581632. Throughput: 0: 782.1. Samples: 145010. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:21:27,952][00517] Avg episode reward: [(0, '5.276')] +[2023-02-24 23:21:32,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3140.3, 300 sec: 2828.2). Total num frames: 593920. Throughput: 0: 780.9. Samples: 148840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:32,953][00517] Avg episode reward: [(0, '5.089')] +[2023-02-24 23:21:32,967][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000145_593920.pth... +[2023-02-24 23:21:37,240][12169] Updated weights for policy 0, policy_version 150 (0.0020) +[2023-02-24 23:21:37,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 2857.7). Total num frames: 614400. Throughput: 0: 799.4. Samples: 154826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:37,952][00517] Avg episode reward: [(0, '4.896')] +[2023-02-24 23:21:42,945][00517] Fps is (10 sec: 4095.9, 60 sec: 3208.5, 300 sec: 2885.8). Total num frames: 634880. Throughput: 0: 799.2. Samples: 157832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:42,947][00517] Avg episode reward: [(0, '5.228')] +[2023-02-24 23:21:47,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 2876.3). Total num frames: 647168. Throughput: 0: 784.7. Samples: 162072. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:21:47,954][00517] Avg episode reward: [(0, '5.463')] +[2023-02-24 23:21:47,957][12151] Saving new best policy, reward=5.463! +[2023-02-24 23:21:50,691][12169] Updated weights for policy 0, policy_version 160 (0.0047) +[2023-02-24 23:21:52,945][00517] Fps is (10 sec: 2457.5, 60 sec: 3140.3, 300 sec: 2867.2). Total num frames: 659456. Throughput: 0: 783.6. Samples: 165842. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:21:52,948][00517] Avg episode reward: [(0, '5.816')] +[2023-02-24 23:21:52,961][12151] Saving new best policy, reward=5.816! +[2023-02-24 23:21:57,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3072.0, 300 sec: 2875.9). Total num frames: 675840. Throughput: 0: 796.3. Samples: 168476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:21:57,954][00517] Avg episode reward: [(0, '5.982')] +[2023-02-24 23:21:57,958][12151] Saving new best policy, reward=5.982! +[2023-02-24 23:22:02,947][00517] Fps is (10 sec: 2866.6, 60 sec: 3071.9, 300 sec: 2867.2). Total num frames: 688128. Throughput: 0: 760.4. Samples: 172490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:22:02,951][00517] Avg episode reward: [(0, '5.966')] +[2023-02-24 23:22:05,676][12169] Updated weights for policy 0, policy_version 170 (0.0031) +[2023-02-24 23:22:07,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3003.8, 300 sec: 2858.8). Total num frames: 700416. Throughput: 0: 722.2. Samples: 175652. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:22:07,954][00517] Avg episode reward: [(0, '5.988')] +[2023-02-24 23:22:07,961][12151] Saving new best policy, reward=5.988! +[2023-02-24 23:22:12,944][00517] Fps is (10 sec: 2048.5, 60 sec: 2935.5, 300 sec: 2834.4). Total num frames: 708608. Throughput: 0: 715.3. Samples: 177198. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:22:12,951][00517] Avg episode reward: [(0, '5.701')] +[2023-02-24 23:22:17,944][00517] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 2843.1). Total num frames: 724992. Throughput: 0: 718.9. Samples: 181190. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:22:17,948][00517] Avg episode reward: [(0, '6.181')] +[2023-02-24 23:22:17,950][12151] Saving new best policy, reward=6.181! +[2023-02-24 23:22:20,646][12169] Updated weights for policy 0, policy_version 180 (0.0035) +[2023-02-24 23:22:22,944][00517] Fps is (10 sec: 3686.4, 60 sec: 2935.5, 300 sec: 2867.2). Total num frames: 745472. Throughput: 0: 717.6. Samples: 187120. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:22:22,947][00517] Avg episode reward: [(0, '5.963')] +[2023-02-24 23:22:27,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3003.7, 300 sec: 2874.9). Total num frames: 761856. Throughput: 0: 716.8. Samples: 190086. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:22:27,949][00517] Avg episode reward: [(0, '6.160')] +[2023-02-24 23:22:32,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3003.7, 300 sec: 2867.2). Total num frames: 774144. Throughput: 0: 713.8. Samples: 194194. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:22:32,950][00517] Avg episode reward: [(0, '6.156')] +[2023-02-24 23:22:33,649][12169] Updated weights for policy 0, policy_version 190 (0.0016) +[2023-02-24 23:22:37,944][00517] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 2859.8). Total num frames: 786432. Throughput: 0: 718.1. Samples: 198156. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:22:37,950][00517] Avg episode reward: [(0, '6.636')] +[2023-02-24 23:22:37,957][12151] Saving new best policy, reward=6.636! +[2023-02-24 23:22:42,944][00517] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2881.8). Total num frames: 806912. Throughput: 0: 725.7. Samples: 201132. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:22:42,947][00517] Avg episode reward: [(0, '6.811')] +[2023-02-24 23:22:43,017][12151] Saving new best policy, reward=6.811! +[2023-02-24 23:22:45,132][12169] Updated weights for policy 0, policy_version 200 (0.0022) +[2023-02-24 23:22:47,947][00517] Fps is (10 sec: 4095.0, 60 sec: 3003.6, 300 sec: 2903.1). Total num frames: 827392. Throughput: 0: 769.7. Samples: 207128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:22:47,950][00517] Avg episode reward: [(0, '7.892')] +[2023-02-24 23:22:47,957][12151] Saving new best policy, reward=7.892! +[2023-02-24 23:22:52,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 2895.4). Total num frames: 839680. Throughput: 0: 787.4. Samples: 211086. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:22:52,952][00517] Avg episode reward: [(0, '8.450')] +[2023-02-24 23:22:52,965][12151] Saving new best policy, reward=8.450! +[2023-02-24 23:22:57,945][00517] Fps is (10 sec: 2458.1, 60 sec: 2935.5, 300 sec: 2888.0). Total num frames: 851968. Throughput: 0: 794.3. Samples: 212942. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:22:57,953][00517] Avg episode reward: [(0, '8.507')] +[2023-02-24 23:22:57,957][12151] Saving new best policy, reward=8.507! +[2023-02-24 23:22:59,316][12169] Updated weights for policy 0, policy_version 210 (0.0021) +[2023-02-24 23:23:02,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3072.1, 300 sec: 2957.5). Total num frames: 872448. Throughput: 0: 819.1. Samples: 218050. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:23:02,947][00517] Avg episode reward: [(0, '8.784')] +[2023-02-24 23:23:02,967][12151] Saving new best policy, reward=8.784! +[2023-02-24 23:23:07,944][00517] Fps is (10 sec: 4096.1, 60 sec: 3208.5, 300 sec: 3026.9). Total num frames: 892928. Throughput: 0: 822.4. Samples: 224126. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:23:07,950][00517] Avg episode reward: [(0, '8.084')] +[2023-02-24 23:23:10,581][12169] Updated weights for policy 0, policy_version 220 (0.0020) +[2023-02-24 23:23:12,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3276.8, 300 sec: 3068.5). Total num frames: 905216. Throughput: 0: 802.6. Samples: 226204. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:23:12,950][00517] Avg episode reward: [(0, '8.228')] +[2023-02-24 23:23:17,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3096.3). Total num frames: 917504. Throughput: 0: 794.8. Samples: 229962. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:23:17,947][00517] Avg episode reward: [(0, '7.906')] +[2023-02-24 23:23:22,944][00517] Fps is (10 sec: 2867.3, 60 sec: 3140.3, 300 sec: 3082.4). Total num frames: 933888. Throughput: 0: 817.9. Samples: 234960. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:23:22,947][00517] Avg episode reward: [(0, '8.092')] +[2023-02-24 23:23:24,180][12169] Updated weights for policy 0, policy_version 230 (0.0022) +[2023-02-24 23:23:27,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3110.2). Total num frames: 954368. Throughput: 0: 820.5. Samples: 238056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:23:27,950][00517] Avg episode reward: [(0, '8.611')] +[2023-02-24 23:23:32,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3124.1). Total num frames: 970752. Throughput: 0: 802.5. Samples: 243238. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:23:32,951][00517] Avg episode reward: [(0, '8.892')] +[2023-02-24 23:23:32,967][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000237_970752.pth... +[2023-02-24 23:23:33,144][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000053_217088.pth +[2023-02-24 23:23:33,160][12151] Saving new best policy, reward=8.892! +[2023-02-24 23:23:36,907][12169] Updated weights for policy 0, policy_version 240 (0.0022) +[2023-02-24 23:23:37,945][00517] Fps is (10 sec: 2867.0, 60 sec: 3276.8, 300 sec: 3096.3). Total num frames: 983040. Throughput: 0: 796.7. Samples: 246936. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:23:37,948][00517] Avg episode reward: [(0, '8.276')] +[2023-02-24 23:23:42,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3082.4). Total num frames: 999424. Throughput: 0: 800.7. Samples: 248972. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:23:42,947][00517] Avg episode reward: [(0, '7.651')] +[2023-02-24 23:23:47,944][00517] Fps is (10 sec: 3686.6, 60 sec: 3208.7, 300 sec: 3124.1). Total num frames: 1019904. Throughput: 0: 823.6. Samples: 255112. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:23:47,951][00517] Avg episode reward: [(0, '7.416')] +[2023-02-24 23:23:48,348][12169] Updated weights for policy 0, policy_version 250 (0.0026) +[2023-02-24 23:23:52,945][00517] Fps is (10 sec: 3686.1, 60 sec: 3276.8, 300 sec: 3137.9). Total num frames: 1036288. Throughput: 0: 806.4. Samples: 260414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:23:52,949][00517] Avg episode reward: [(0, '7.605')] +[2023-02-24 23:23:57,949][00517] Fps is (10 sec: 2866.0, 60 sec: 3276.6, 300 sec: 3151.8). Total num frames: 1048576. Throughput: 0: 802.3. Samples: 262312. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:23:57,952][00517] Avg episode reward: [(0, '8.308')] +[2023-02-24 23:24:02,697][12169] Updated weights for policy 0, policy_version 260 (0.0026) +[2023-02-24 23:24:02,945][00517] Fps is (10 sec: 2867.4, 60 sec: 3208.5, 300 sec: 3151.8). Total num frames: 1064960. Throughput: 0: 805.5. Samples: 266210. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:24:02,952][00517] Avg episode reward: [(0, '9.277')] +[2023-02-24 23:24:02,967][12151] Saving new best policy, reward=9.277! +[2023-02-24 23:24:07,944][00517] Fps is (10 sec: 3688.0, 60 sec: 3208.5, 300 sec: 3151.8). Total num frames: 1085440. Throughput: 0: 826.1. Samples: 272136. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 23:24:07,947][00517] Avg episode reward: [(0, '10.469')] +[2023-02-24 23:24:07,950][12151] Saving new best policy, reward=10.469! +[2023-02-24 23:24:12,945][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3151.8). Total num frames: 1101824. Throughput: 0: 823.4. Samples: 275110. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:24:12,947][00517] Avg episode reward: [(0, '11.636')] +[2023-02-24 23:24:12,962][12151] Saving new best policy, reward=11.636! +[2023-02-24 23:24:13,599][12169] Updated weights for policy 0, policy_version 270 (0.0013) +[2023-02-24 23:24:17,947][00517] Fps is (10 sec: 2866.5, 60 sec: 3276.7, 300 sec: 3165.7). Total num frames: 1114112. Throughput: 0: 802.0. Samples: 279328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:24:17,951][00517] Avg episode reward: [(0, '11.525')] +[2023-02-24 23:24:22,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3138.0). Total num frames: 1126400. Throughput: 0: 806.2. Samples: 283216. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:24:22,948][00517] Avg episode reward: [(0, '11.213')] +[2023-02-24 23:24:27,068][12169] Updated weights for policy 0, policy_version 280 (0.0019) +[2023-02-24 23:24:27,944][00517] Fps is (10 sec: 3277.6, 60 sec: 3208.5, 300 sec: 3151.8). Total num frames: 1146880. Throughput: 0: 826.2. Samples: 286150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:24:27,947][00517] Avg episode reward: [(0, '9.956')] +[2023-02-24 23:24:32,944][00517] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3165.7). Total num frames: 1167360. Throughput: 0: 825.1. Samples: 292240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:24:32,948][00517] Avg episode reward: [(0, '10.388')] +[2023-02-24 23:24:37,950][00517] Fps is (10 sec: 3684.5, 60 sec: 3344.8, 300 sec: 3179.6). Total num frames: 1183744. Throughput: 0: 803.7. Samples: 296584. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:24:37,960][00517] Avg episode reward: [(0, '10.410')] +[2023-02-24 23:24:39,494][12169] Updated weights for policy 0, policy_version 290 (0.0021) +[2023-02-24 23:24:42,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 1196032. Throughput: 0: 804.6. Samples: 298516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:24:42,948][00517] Avg episode reward: [(0, '10.322')] +[2023-02-24 23:24:47,944][00517] Fps is (10 sec: 2868.7, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 1212416. Throughput: 0: 823.6. Samples: 303274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:24:47,947][00517] Avg episode reward: [(0, '11.317')] +[2023-02-24 23:24:51,501][12169] Updated weights for policy 0, policy_version 300 (0.0021) +[2023-02-24 23:24:52,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 1232896. Throughput: 0: 822.3. Samples: 309140. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:24:52,952][00517] Avg episode reward: [(0, '9.791')] +[2023-02-24 23:24:57,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3345.3, 300 sec: 3193.5). Total num frames: 1249280. Throughput: 0: 812.4. Samples: 311666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:24:57,946][00517] Avg episode reward: [(0, '10.058')] +[2023-02-24 23:25:02,946][00517] Fps is (10 sec: 2866.8, 60 sec: 3276.7, 300 sec: 3179.6). Total num frames: 1261568. Throughput: 0: 803.7. Samples: 315492. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:25:02,949][00517] Avg episode reward: [(0, '9.939')] +[2023-02-24 23:25:06,324][12169] Updated weights for policy 0, policy_version 310 (0.0026) +[2023-02-24 23:25:07,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3140.3, 300 sec: 3151.8). Total num frames: 1273856. Throughput: 0: 798.3. Samples: 319140. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:25:07,949][00517] Avg episode reward: [(0, '10.509')] +[2023-02-24 23:25:12,944][00517] Fps is (10 sec: 2457.9, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 1286144. Throughput: 0: 774.4. Samples: 320998. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:25:12,949][00517] Avg episode reward: [(0, '10.734')] +[2023-02-24 23:25:17,947][00517] Fps is (10 sec: 2457.1, 60 sec: 3072.0, 300 sec: 3138.2). Total num frames: 1298432. Throughput: 0: 722.9. Samples: 324772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:25:17,950][00517] Avg episode reward: [(0, '11.327')] +[2023-02-24 23:25:22,547][12169] Updated weights for policy 0, policy_version 320 (0.0015) +[2023-02-24 23:25:22,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3072.0, 300 sec: 3124.1). Total num frames: 1310720. Throughput: 0: 709.9. Samples: 328526. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:25:22,952][00517] Avg episode reward: [(0, '10.984')] +[2023-02-24 23:25:27,947][00517] Fps is (10 sec: 2457.4, 60 sec: 2935.3, 300 sec: 3110.2). Total num frames: 1323008. Throughput: 0: 708.8. Samples: 330414. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:25:27,956][00517] Avg episode reward: [(0, '11.268')] +[2023-02-24 23:25:32,945][00517] Fps is (10 sec: 3276.8, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 1343488. Throughput: 0: 727.0. Samples: 335990. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:25:32,947][00517] Avg episode reward: [(0, '10.559')] +[2023-02-24 23:25:32,970][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000328_1343488.pth... +[2023-02-24 23:25:33,132][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000145_593920.pth +[2023-02-24 23:25:34,450][12169] Updated weights for policy 0, policy_version 330 (0.0017) +[2023-02-24 23:25:37,944][00517] Fps is (10 sec: 4097.3, 60 sec: 3004.0, 300 sec: 3124.1). Total num frames: 1363968. Throughput: 0: 724.8. Samples: 341758. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:25:37,947][00517] Avg episode reward: [(0, '10.393')] +[2023-02-24 23:25:42,949][00517] Fps is (10 sec: 3275.4, 60 sec: 3003.5, 300 sec: 3124.0). Total num frames: 1376256. Throughput: 0: 710.1. Samples: 343624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:25:42,955][00517] Avg episode reward: [(0, '9.694')] +[2023-02-24 23:25:47,944][00517] Fps is (10 sec: 2457.6, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 1388544. Throughput: 0: 710.3. Samples: 347454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:25:47,952][00517] Avg episode reward: [(0, '10.397')] +[2023-02-24 23:25:48,801][12169] Updated weights for policy 0, policy_version 340 (0.0016) +[2023-02-24 23:25:52,944][00517] Fps is (10 sec: 3278.2, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 1409024. Throughput: 0: 750.3. Samples: 352904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:25:52,948][00517] Avg episode reward: [(0, '10.574')] +[2023-02-24 23:25:57,944][00517] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 3138.0). Total num frames: 1429504. Throughput: 0: 776.0. Samples: 355920. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:25:57,947][00517] Avg episode reward: [(0, '11.590')] +[2023-02-24 23:25:58,970][12169] Updated weights for policy 0, policy_version 350 (0.0025) +[2023-02-24 23:26:02,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3003.8, 300 sec: 3124.1). Total num frames: 1441792. Throughput: 0: 799.1. Samples: 360732. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:26:02,948][00517] Avg episode reward: [(0, '12.182')] +[2023-02-24 23:26:02,966][12151] Saving new best policy, reward=12.182! +[2023-02-24 23:26:07,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 1454080. Throughput: 0: 800.0. Samples: 364524. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:26:07,950][00517] Avg episode reward: [(0, '12.142')] +[2023-02-24 23:26:12,944][00517] Fps is (10 sec: 2867.3, 60 sec: 3072.0, 300 sec: 3110.2). Total num frames: 1470464. Throughput: 0: 811.6. Samples: 366932. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:26:12,954][00517] Avg episode reward: [(0, '12.164')] +[2023-02-24 23:26:13,339][12169] Updated weights for policy 0, policy_version 360 (0.0027) +[2023-02-24 23:26:17,945][00517] Fps is (10 sec: 3686.3, 60 sec: 3208.6, 300 sec: 3124.1). Total num frames: 1490944. Throughput: 0: 814.7. Samples: 372650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:17,950][00517] Avg episode reward: [(0, '13.687')] +[2023-02-24 23:26:17,956][12151] Saving new best policy, reward=13.687! +[2023-02-24 23:26:22,947][00517] Fps is (10 sec: 3276.0, 60 sec: 3208.4, 300 sec: 3124.0). Total num frames: 1503232. Throughput: 0: 788.0. Samples: 377218. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:26:22,953][00517] Avg episode reward: [(0, '13.805')] +[2023-02-24 23:26:22,968][12151] Saving new best policy, reward=13.805! +[2023-02-24 23:26:26,738][12169] Updated weights for policy 0, policy_version 370 (0.0020) +[2023-02-24 23:26:27,951][00517] Fps is (10 sec: 2456.1, 60 sec: 3208.4, 300 sec: 3124.0). Total num frames: 1515520. Throughput: 0: 785.7. Samples: 378984. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:27,955][00517] Avg episode reward: [(0, '14.065')] +[2023-02-24 23:26:27,960][12151] Saving new best policy, reward=14.065! +[2023-02-24 23:26:32,944][00517] Fps is (10 sec: 2867.9, 60 sec: 3140.3, 300 sec: 3110.2). Total num frames: 1531904. Throughput: 0: 795.2. Samples: 383240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:26:32,950][00517] Avg episode reward: [(0, '15.206')] +[2023-02-24 23:26:32,964][12151] Saving new best policy, reward=15.206! +[2023-02-24 23:26:37,946][00517] Fps is (10 sec: 3688.2, 60 sec: 3140.2, 300 sec: 3110.2). Total num frames: 1552384. Throughput: 0: 808.4. Samples: 389282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:26:37,952][00517] Avg episode reward: [(0, '16.652')] +[2023-02-24 23:26:37,954][12151] Saving new best policy, reward=16.652! +[2023-02-24 23:26:38,462][12169] Updated weights for policy 0, policy_version 380 (0.0019) +[2023-02-24 23:26:42,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.8, 300 sec: 3124.1). Total num frames: 1568768. Throughput: 0: 807.2. Samples: 392242. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:42,949][00517] Avg episode reward: [(0, '16.220')] +[2023-02-24 23:26:47,944][00517] Fps is (10 sec: 2867.6, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 1581056. Throughput: 0: 783.6. Samples: 395994. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:47,951][00517] Avg episode reward: [(0, '16.890')] +[2023-02-24 23:26:47,954][12151] Saving new best policy, reward=16.890! +[2023-02-24 23:26:52,885][12169] Updated weights for policy 0, policy_version 390 (0.0017) +[2023-02-24 23:26:52,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 3124.1). Total num frames: 1597440. Throughput: 0: 788.0. Samples: 399984. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:52,947][00517] Avg episode reward: [(0, '17.348')] +[2023-02-24 23:26:52,958][12151] Saving new best policy, reward=17.348! +[2023-02-24 23:26:57,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 1613824. Throughput: 0: 797.6. Samples: 402826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:26:57,948][00517] Avg episode reward: [(0, '17.624')] +[2023-02-24 23:26:57,956][12151] Saving new best policy, reward=17.624! +[2023-02-24 23:27:02,947][00517] Fps is (10 sec: 3685.5, 60 sec: 3208.4, 300 sec: 3165.7). Total num frames: 1634304. Throughput: 0: 796.8. Samples: 408510. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:27:02,952][00517] Avg episode reward: [(0, '16.280')] +[2023-02-24 23:27:04,318][12169] Updated weights for policy 0, policy_version 400 (0.0015) +[2023-02-24 23:27:07,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3208.5, 300 sec: 3179.6). Total num frames: 1646592. Throughput: 0: 778.9. Samples: 412268. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:27:07,955][00517] Avg episode reward: [(0, '17.312')] +[2023-02-24 23:27:12,944][00517] Fps is (10 sec: 2458.2, 60 sec: 3140.3, 300 sec: 3165.7). Total num frames: 1658880. Throughput: 0: 782.6. Samples: 414198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:27:12,946][00517] Avg episode reward: [(0, '17.716')] +[2023-02-24 23:27:12,956][12151] Saving new best policy, reward=17.716! +[2023-02-24 23:27:17,943][12169] Updated weights for policy 0, policy_version 410 (0.0015) +[2023-02-24 23:27:17,950][00517] Fps is (10 sec: 3275.2, 60 sec: 3140.0, 300 sec: 3165.7). Total num frames: 1679360. Throughput: 0: 802.3. Samples: 419350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:27:17,958][00517] Avg episode reward: [(0, '19.053')] +[2023-02-24 23:27:17,965][12151] Saving new best policy, reward=19.053! +[2023-02-24 23:27:22,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.7, 300 sec: 3165.7). Total num frames: 1695744. Throughput: 0: 803.0. Samples: 425416. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:27:22,948][00517] Avg episode reward: [(0, '18.355')] +[2023-02-24 23:27:27,946][00517] Fps is (10 sec: 3278.2, 60 sec: 3277.1, 300 sec: 3179.6). Total num frames: 1712128. Throughput: 0: 781.8. Samples: 427422. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:27:27,948][00517] Avg episode reward: [(0, '18.061')] +[2023-02-24 23:27:30,864][12169] Updated weights for policy 0, policy_version 420 (0.0019) +[2023-02-24 23:27:32,945][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3179.6). Total num frames: 1724416. Throughput: 0: 784.9. Samples: 431316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:27:32,948][00517] Avg episode reward: [(0, '17.104')] +[2023-02-24 23:27:32,974][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000421_1724416.pth... +[2023-02-24 23:27:33,167][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000237_970752.pth +[2023-02-24 23:27:37,945][00517] Fps is (10 sec: 2867.4, 60 sec: 3140.3, 300 sec: 3165.7). Total num frames: 1740800. Throughput: 0: 808.0. Samples: 436344. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:27:37,947][00517] Avg episode reward: [(0, '15.908')] +[2023-02-24 23:27:42,315][12169] Updated weights for policy 0, policy_version 430 (0.0023) +[2023-02-24 23:27:42,944][00517] Fps is (10 sec: 3686.5, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 1761280. Throughput: 0: 811.5. Samples: 439342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:27:42,950][00517] Avg episode reward: [(0, '14.480')] +[2023-02-24 23:27:47,951][00517] Fps is (10 sec: 3684.2, 60 sec: 3276.5, 300 sec: 3179.5). Total num frames: 1777664. Throughput: 0: 802.6. Samples: 444628. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:27:47,954][00517] Avg episode reward: [(0, '14.634')] +[2023-02-24 23:27:52,945][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3179.6). Total num frames: 1789952. Throughput: 0: 803.6. Samples: 448430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:27:52,949][00517] Avg episode reward: [(0, '15.235')] +[2023-02-24 23:27:56,505][12169] Updated weights for policy 0, policy_version 440 (0.0031) +[2023-02-24 23:27:57,944][00517] Fps is (10 sec: 2869.0, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 1806336. Throughput: 0: 806.0. Samples: 450468. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:27:57,947][00517] Avg episode reward: [(0, '16.196')] +[2023-02-24 23:28:02,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.7, 300 sec: 3165.7). Total num frames: 1826816. Throughput: 0: 827.4. Samples: 456580. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:28:02,947][00517] Avg episode reward: [(0, '16.679')] +[2023-02-24 23:28:07,131][12169] Updated weights for policy 0, policy_version 450 (0.0024) +[2023-02-24 23:28:07,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 1843200. Throughput: 0: 809.2. Samples: 461830. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:28:07,952][00517] Avg episode reward: [(0, '17.394')] +[2023-02-24 23:28:12,948][00517] Fps is (10 sec: 2866.3, 60 sec: 3276.6, 300 sec: 3179.6). Total num frames: 1855488. Throughput: 0: 806.2. Samples: 463704. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:28:12,951][00517] Avg episode reward: [(0, '17.531')] +[2023-02-24 23:28:17,950][00517] Fps is (10 sec: 2046.9, 60 sec: 3072.0, 300 sec: 3151.8). Total num frames: 1863680. Throughput: 0: 786.7. Samples: 466720. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:28:17,953][00517] Avg episode reward: [(0, '17.008')] +[2023-02-24 23:28:22,945][00517] Fps is (10 sec: 2048.6, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 1875968. Throughput: 0: 753.3. Samples: 470244. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:28:22,947][00517] Avg episode reward: [(0, '16.258')] +[2023-02-24 23:28:24,965][12169] Updated weights for policy 0, policy_version 460 (0.0042) +[2023-02-24 23:28:27,944][00517] Fps is (10 sec: 3278.5, 60 sec: 3072.1, 300 sec: 3138.0). Total num frames: 1896448. Throughput: 0: 739.5. Samples: 472620. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:28:27,948][00517] Avg episode reward: [(0, '15.617')] +[2023-02-24 23:28:32,949][00517] Fps is (10 sec: 3275.5, 60 sec: 3071.8, 300 sec: 3137.9). Total num frames: 1908736. Throughput: 0: 735.9. Samples: 477740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:28:32,952][00517] Avg episode reward: [(0, '15.714')] +[2023-02-24 23:28:37,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3003.8, 300 sec: 3124.1). Total num frames: 1921024. Throughput: 0: 737.7. Samples: 481626. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:28:37,949][00517] Avg episode reward: [(0, '15.591')] +[2023-02-24 23:28:38,233][12169] Updated weights for policy 0, policy_version 470 (0.0022) +[2023-02-24 23:28:42,945][00517] Fps is (10 sec: 2868.3, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 1937408. Throughput: 0: 738.0. Samples: 483680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:28:42,951][00517] Avg episode reward: [(0, '16.277')] +[2023-02-24 23:28:47,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3004.0, 300 sec: 3124.1). Total num frames: 1957888. Throughput: 0: 738.8. Samples: 489826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:28:47,947][00517] Avg episode reward: [(0, '16.859')] +[2023-02-24 23:28:49,254][12169] Updated weights for policy 0, policy_version 480 (0.0016) +[2023-02-24 23:28:52,945][00517] Fps is (10 sec: 3686.5, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 1974272. Throughput: 0: 734.9. Samples: 494900. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:28:52,956][00517] Avg episode reward: [(0, '17.503')] +[2023-02-24 23:28:57,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 1990656. Throughput: 0: 735.8. Samples: 496812. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:28:57,956][00517] Avg episode reward: [(0, '19.003')] +[2023-02-24 23:29:02,944][00517] Fps is (10 sec: 2867.2, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2002944. Throughput: 0: 755.0. Samples: 500692. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 23:29:02,954][00517] Avg episode reward: [(0, '18.687')] +[2023-02-24 23:29:03,529][12169] Updated weights for policy 0, policy_version 490 (0.0026) +[2023-02-24 23:29:07,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 2023424. Throughput: 0: 812.5. Samples: 506808. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:29:07,951][00517] Avg episode reward: [(0, '19.355')] +[2023-02-24 23:29:07,953][12151] Saving new best policy, reward=19.355! +[2023-02-24 23:29:12,950][00517] Fps is (10 sec: 3684.4, 60 sec: 3071.9, 300 sec: 3137.9). Total num frames: 2039808. Throughput: 0: 825.7. Samples: 509780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:29:12,953][00517] Avg episode reward: [(0, '18.967')] +[2023-02-24 23:29:14,682][12169] Updated weights for policy 0, policy_version 500 (0.0018) +[2023-02-24 23:29:17,946][00517] Fps is (10 sec: 2866.6, 60 sec: 3140.4, 300 sec: 3137.9). Total num frames: 2052096. Throughput: 0: 802.7. Samples: 513858. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:29:17,951][00517] Avg episode reward: [(0, '18.963')] +[2023-02-24 23:29:22,944][00517] Fps is (10 sec: 2868.7, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 2068480. Throughput: 0: 802.7. Samples: 517746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:29:22,948][00517] Avg episode reward: [(0, '18.136')] +[2023-02-24 23:29:27,870][12169] Updated weights for policy 0, policy_version 510 (0.0017) +[2023-02-24 23:29:27,944][00517] Fps is (10 sec: 3687.2, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 2088960. Throughput: 0: 824.7. Samples: 520790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:29:27,950][00517] Avg episode reward: [(0, '19.845')] +[2023-02-24 23:29:27,952][12151] Saving new best policy, reward=19.845! +[2023-02-24 23:29:32,946][00517] Fps is (10 sec: 3685.9, 60 sec: 3277.0, 300 sec: 3124.1). Total num frames: 2105344. Throughput: 0: 819.5. Samples: 526706. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:29:32,951][00517] Avg episode reward: [(0, '19.446')] +[2023-02-24 23:29:32,971][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000514_2105344.pth... +[2023-02-24 23:29:33,162][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000328_1343488.pth +[2023-02-24 23:29:37,945][00517] Fps is (10 sec: 2867.1, 60 sec: 3276.8, 300 sec: 3124.1). Total num frames: 2117632. Throughput: 0: 795.2. Samples: 530686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:29:37,947][00517] Avg episode reward: [(0, '19.733')] +[2023-02-24 23:29:41,628][12169] Updated weights for policy 0, policy_version 520 (0.0021) +[2023-02-24 23:29:42,944][00517] Fps is (10 sec: 2457.9, 60 sec: 3208.5, 300 sec: 3110.2). Total num frames: 2129920. Throughput: 0: 794.3. Samples: 532554. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:29:42,949][00517] Avg episode reward: [(0, '19.022')] +[2023-02-24 23:29:47,945][00517] Fps is (10 sec: 3276.9, 60 sec: 3208.5, 300 sec: 3110.2). Total num frames: 2150400. Throughput: 0: 816.0. Samples: 537410. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:29:47,953][00517] Avg episode reward: [(0, '19.978')] +[2023-02-24 23:29:47,961][12151] Saving new best policy, reward=19.978! +[2023-02-24 23:29:52,843][12169] Updated weights for policy 0, policy_version 530 (0.0023) +[2023-02-24 23:29:52,944][00517] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3124.1). Total num frames: 2170880. Throughput: 0: 813.8. Samples: 543430. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:29:52,952][00517] Avg episode reward: [(0, '20.762')] +[2023-02-24 23:29:52,975][12151] Saving new best policy, reward=20.762! +[2023-02-24 23:29:57,959][00517] Fps is (10 sec: 3272.2, 60 sec: 3207.8, 300 sec: 3123.9). Total num frames: 2183168. Throughput: 0: 795.1. Samples: 545568. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:29:57,966][00517] Avg episode reward: [(0, '19.833')] +[2023-02-24 23:30:02,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 2195456. Throughput: 0: 788.1. Samples: 549320. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:30:02,950][00517] Avg episode reward: [(0, '19.925')] +[2023-02-24 23:30:07,311][12169] Updated weights for policy 0, policy_version 540 (0.0029) +[2023-02-24 23:30:07,944][00517] Fps is (10 sec: 2871.3, 60 sec: 3140.3, 300 sec: 3138.0). Total num frames: 2211840. Throughput: 0: 811.9. Samples: 554282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:30:07,947][00517] Avg episode reward: [(0, '20.876')] +[2023-02-24 23:30:07,950][12151] Saving new best policy, reward=20.876! +[2023-02-24 23:30:12,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.8, 300 sec: 3165.7). Total num frames: 2232320. Throughput: 0: 808.2. Samples: 557158. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:30:12,946][00517] Avg episode reward: [(0, '20.829')] +[2023-02-24 23:30:17,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.9, 300 sec: 3179.6). Total num frames: 2248704. Throughput: 0: 790.4. Samples: 562274. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:30:17,952][00517] Avg episode reward: [(0, '22.405')] +[2023-02-24 23:30:17,955][12151] Saving new best policy, reward=22.405! +[2023-02-24 23:30:19,578][12169] Updated weights for policy 0, policy_version 550 (0.0024) +[2023-02-24 23:30:22,945][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3179.6). Total num frames: 2260992. Throughput: 0: 783.7. Samples: 565954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:30:22,951][00517] Avg episode reward: [(0, '21.954')] +[2023-02-24 23:30:27,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3072.0, 300 sec: 3151.8). Total num frames: 2273280. Throughput: 0: 784.5. Samples: 567858. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:30:27,946][00517] Avg episode reward: [(0, '21.014')] +[2023-02-24 23:30:32,260][12169] Updated weights for policy 0, policy_version 560 (0.0018) +[2023-02-24 23:30:32,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3151.8). Total num frames: 2293760. Throughput: 0: 806.4. Samples: 573700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:30:32,948][00517] Avg episode reward: [(0, '21.100')] +[2023-02-24 23:30:37,945][00517] Fps is (10 sec: 3686.3, 60 sec: 3208.5, 300 sec: 3165.8). Total num frames: 2310144. Throughput: 0: 789.1. Samples: 578940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:30:37,951][00517] Avg episode reward: [(0, '21.104')] +[2023-02-24 23:30:42,947][00517] Fps is (10 sec: 2866.4, 60 sec: 3208.4, 300 sec: 3165.7). Total num frames: 2322432. Throughput: 0: 784.5. Samples: 580860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:30:42,953][00517] Avg episode reward: [(0, '21.260')] +[2023-02-24 23:30:46,616][12169] Updated weights for policy 0, policy_version 570 (0.0037) +[2023-02-24 23:30:47,945][00517] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 3151.8). Total num frames: 2338816. Throughput: 0: 782.0. Samples: 584512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:30:47,948][00517] Avg episode reward: [(0, '21.502')] +[2023-02-24 23:30:52,944][00517] Fps is (10 sec: 3277.8, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 2355200. Throughput: 0: 805.2. Samples: 590516. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:30:52,950][00517] Avg episode reward: [(0, '22.051')] +[2023-02-24 23:30:56,968][12169] Updated weights for policy 0, policy_version 580 (0.0013) +[2023-02-24 23:30:57,949][00517] Fps is (10 sec: 3685.0, 60 sec: 3209.1, 300 sec: 3165.7). Total num frames: 2375680. Throughput: 0: 807.7. Samples: 593508. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-24 23:30:57,957][00517] Avg episode reward: [(0, '22.637')] +[2023-02-24 23:30:57,961][12151] Saving new best policy, reward=22.637! +[2023-02-24 23:31:02,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 2387968. Throughput: 0: 785.3. Samples: 597614. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:31:02,950][00517] Avg episode reward: [(0, '22.222')] +[2023-02-24 23:31:07,946][00517] Fps is (10 sec: 2458.4, 60 sec: 3140.2, 300 sec: 3151.8). Total num frames: 2400256. Throughput: 0: 787.8. Samples: 601406. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:31:07,950][00517] Avg episode reward: [(0, '22.860')] +[2023-02-24 23:31:07,962][12151] Saving new best policy, reward=22.860! +[2023-02-24 23:31:11,546][12169] Updated weights for policy 0, policy_version 590 (0.0017) +[2023-02-24 23:31:12,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3151.8). Total num frames: 2420736. Throughput: 0: 806.5. Samples: 604152. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:31:12,952][00517] Avg episode reward: [(0, '22.751')] +[2023-02-24 23:31:17,944][00517] Fps is (10 sec: 3686.8, 60 sec: 3140.3, 300 sec: 3165.7). Total num frames: 2437120. Throughput: 0: 804.6. Samples: 609908. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:31:17,948][00517] Avg episode reward: [(0, '22.664')] +[2023-02-24 23:31:22,947][00517] Fps is (10 sec: 2866.6, 60 sec: 3140.2, 300 sec: 3165.8). Total num frames: 2449408. Throughput: 0: 769.6. Samples: 613574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:31:22,951][00517] Avg episode reward: [(0, '20.482')] +[2023-02-24 23:31:25,829][12169] Updated weights for policy 0, policy_version 600 (0.0021) +[2023-02-24 23:31:27,948][00517] Fps is (10 sec: 2047.3, 60 sec: 3071.8, 300 sec: 3137.9). Total num frames: 2457600. Throughput: 0: 758.5. Samples: 614994. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:31:27,950][00517] Avg episode reward: [(0, '20.587')] +[2023-02-24 23:31:32,944][00517] Fps is (10 sec: 2048.4, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2469888. Throughput: 0: 742.3. Samples: 617914. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:31:32,947][00517] Avg episode reward: [(0, '20.228')] +[2023-02-24 23:31:32,966][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000603_2469888.pth... +[2023-02-24 23:31:33,153][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000421_1724416.pth +[2023-02-24 23:31:37,944][00517] Fps is (10 sec: 2868.1, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2486272. Throughput: 0: 703.0. Samples: 622150. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:31:37,950][00517] Avg episode reward: [(0, '19.768')] +[2023-02-24 23:31:40,961][12169] Updated weights for policy 0, policy_version 610 (0.0019) +[2023-02-24 23:31:42,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.9, 300 sec: 3124.1). Total num frames: 2502656. Throughput: 0: 702.4. Samples: 625114. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:31:42,953][00517] Avg episode reward: [(0, '20.701')] +[2023-02-24 23:31:47,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.8, 300 sec: 3124.1). Total num frames: 2519040. Throughput: 0: 727.0. Samples: 630330. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:31:47,947][00517] Avg episode reward: [(0, '20.837')] +[2023-02-24 23:31:52,944][00517] Fps is (10 sec: 2867.2, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2531328. Throughput: 0: 726.2. Samples: 634086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:31:52,948][00517] Avg episode reward: [(0, '21.625')] +[2023-02-24 23:31:54,943][12169] Updated weights for policy 0, policy_version 620 (0.0020) +[2023-02-24 23:31:57,944][00517] Fps is (10 sec: 2867.2, 60 sec: 2867.4, 300 sec: 3096.3). Total num frames: 2547712. Throughput: 0: 707.1. Samples: 635970. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:31:57,948][00517] Avg episode reward: [(0, '21.738')] +[2023-02-24 23:32:02,944][00517] Fps is (10 sec: 3276.8, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2564096. Throughput: 0: 706.8. Samples: 641712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:02,954][00517] Avg episode reward: [(0, '21.178')] +[2023-02-24 23:32:06,257][12169] Updated weights for policy 0, policy_version 630 (0.0016) +[2023-02-24 23:32:07,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3072.1, 300 sec: 3138.0). Total num frames: 2584576. Throughput: 0: 737.5. Samples: 646762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:07,950][00517] Avg episode reward: [(0, '22.238')] +[2023-02-24 23:32:12,945][00517] Fps is (10 sec: 3276.8, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 2596864. Throughput: 0: 749.0. Samples: 648696. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:12,952][00517] Avg episode reward: [(0, '23.087')] +[2023-02-24 23:32:12,962][12151] Saving new best policy, reward=23.087! +[2023-02-24 23:32:17,944][00517] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 3096.3). Total num frames: 2609152. Throughput: 0: 770.4. Samples: 652580. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:32:17,949][00517] Avg episode reward: [(0, '21.963')] +[2023-02-24 23:32:20,131][12169] Updated weights for policy 0, policy_version 640 (0.0037) +[2023-02-24 23:32:22,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.8, 300 sec: 3110.2). Total num frames: 2629632. Throughput: 0: 810.4. Samples: 658618. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:32:22,947][00517] Avg episode reward: [(0, '21.819')] +[2023-02-24 23:32:27,948][00517] Fps is (10 sec: 3685.2, 60 sec: 3140.3, 300 sec: 3124.0). Total num frames: 2646016. Throughput: 0: 812.2. Samples: 661666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:27,955][00517] Avg episode reward: [(0, '21.715')] +[2023-02-24 23:32:32,862][12169] Updated weights for policy 0, policy_version 650 (0.0030) +[2023-02-24 23:32:32,945][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 2662400. Throughput: 0: 785.6. Samples: 665682. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:32:32,951][00517] Avg episode reward: [(0, '22.738')] +[2023-02-24 23:32:37,944][00517] Fps is (10 sec: 2868.1, 60 sec: 3140.3, 300 sec: 3096.3). Total num frames: 2674688. Throughput: 0: 788.5. Samples: 669568. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:37,946][00517] Avg episode reward: [(0, '23.387')] +[2023-02-24 23:32:37,952][12151] Saving new best policy, reward=23.387! +[2023-02-24 23:32:42,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3208.5, 300 sec: 3110.2). Total num frames: 2695168. Throughput: 0: 812.5. Samples: 672532. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:32:42,948][00517] Avg episode reward: [(0, '21.877')] +[2023-02-24 23:32:44,717][12169] Updated weights for policy 0, policy_version 660 (0.0022) +[2023-02-24 23:32:47,948][00517] Fps is (10 sec: 4094.7, 60 sec: 3276.6, 300 sec: 3137.9). Total num frames: 2715648. Throughput: 0: 819.1. Samples: 678574. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:32:47,951][00517] Avg episode reward: [(0, '21.816')] +[2023-02-24 23:32:52,944][00517] Fps is (10 sec: 3276.9, 60 sec: 3276.8, 300 sec: 3124.1). Total num frames: 2727936. Throughput: 0: 802.6. Samples: 682880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:32:52,948][00517] Avg episode reward: [(0, '21.847')] +[2023-02-24 23:32:57,944][00517] Fps is (10 sec: 2458.4, 60 sec: 3208.5, 300 sec: 3096.3). Total num frames: 2740224. Throughput: 0: 801.6. Samples: 684766. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:32:57,954][00517] Avg episode reward: [(0, '21.378')] +[2023-02-24 23:32:58,707][12169] Updated weights for policy 0, policy_version 670 (0.0017) +[2023-02-24 23:33:02,945][00517] Fps is (10 sec: 2867.1, 60 sec: 3208.5, 300 sec: 3096.3). Total num frames: 2756608. Throughput: 0: 823.3. Samples: 689630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:33:02,947][00517] Avg episode reward: [(0, '20.619')] +[2023-02-24 23:33:07,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 2777088. Throughput: 0: 823.3. Samples: 695666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:33:07,947][00517] Avg episode reward: [(0, '20.653')] +[2023-02-24 23:33:09,134][12169] Updated weights for policy 0, policy_version 680 (0.0033) +[2023-02-24 23:33:12,944][00517] Fps is (10 sec: 3686.5, 60 sec: 3276.8, 300 sec: 3151.9). Total num frames: 2793472. Throughput: 0: 806.5. Samples: 697954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:33:12,953][00517] Avg episode reward: [(0, '21.152')] +[2023-02-24 23:33:17,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3151.8). Total num frames: 2805760. Throughput: 0: 802.8. Samples: 701806. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:33:17,956][00517] Avg episode reward: [(0, '21.321')] +[2023-02-24 23:33:22,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3138.0). Total num frames: 2822144. Throughput: 0: 822.4. Samples: 706574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:33:22,946][00517] Avg episode reward: [(0, '22.978')] +[2023-02-24 23:33:23,697][12169] Updated weights for policy 0, policy_version 690 (0.0054) +[2023-02-24 23:33:27,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3277.0, 300 sec: 3165.8). Total num frames: 2842624. Throughput: 0: 821.2. Samples: 709484. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:33:27,948][00517] Avg episode reward: [(0, '23.026')] +[2023-02-24 23:33:32,945][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 2859008. Throughput: 0: 806.3. Samples: 714854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:33:32,948][00517] Avg episode reward: [(0, '22.444')] +[2023-02-24 23:33:32,960][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000698_2859008.pth... +[2023-02-24 23:33:33,102][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000514_2105344.pth +[2023-02-24 23:33:35,877][12169] Updated weights for policy 0, policy_version 700 (0.0021) +[2023-02-24 23:33:37,949][00517] Fps is (10 sec: 2866.0, 60 sec: 3276.6, 300 sec: 3165.7). Total num frames: 2871296. Throughput: 0: 792.9. Samples: 718564. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:33:37,953][00517] Avg episode reward: [(0, '22.295')] +[2023-02-24 23:33:42,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3140.3, 300 sec: 3138.0). Total num frames: 2883584. Throughput: 0: 793.5. Samples: 720474. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:33:42,946][00517] Avg episode reward: [(0, '22.682')] +[2023-02-24 23:33:47,944][00517] Fps is (10 sec: 3278.2, 60 sec: 3140.4, 300 sec: 3151.8). Total num frames: 2904064. Throughput: 0: 816.2. Samples: 726358. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:33:47,949][00517] Avg episode reward: [(0, '22.663')] +[2023-02-24 23:33:48,121][12169] Updated weights for policy 0, policy_version 710 (0.0016) +[2023-02-24 23:33:52,944][00517] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3165.7). Total num frames: 2924544. Throughput: 0: 805.7. Samples: 731924. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:33:52,952][00517] Avg episode reward: [(0, '23.220')] +[2023-02-24 23:33:57,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3165.7). Total num frames: 2936832. Throughput: 0: 797.2. Samples: 733828. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:33:57,955][00517] Avg episode reward: [(0, '23.493')] +[2023-02-24 23:33:57,964][12151] Saving new best policy, reward=23.493! +[2023-02-24 23:34:02,319][12169] Updated weights for policy 0, policy_version 720 (0.0014) +[2023-02-24 23:34:02,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3138.0). Total num frames: 2949120. Throughput: 0: 794.8. Samples: 737570. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:34:02,948][00517] Avg episode reward: [(0, '22.302')] +[2023-02-24 23:34:07,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3151.9). Total num frames: 2969600. Throughput: 0: 810.2. Samples: 743034. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:34:07,947][00517] Avg episode reward: [(0, '23.470')] +[2023-02-24 23:34:12,939][12169] Updated weights for policy 0, policy_version 730 (0.0014) +[2023-02-24 23:34:12,944][00517] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 2990080. Throughput: 0: 812.3. Samples: 746036. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:34:12,947][00517] Avg episode reward: [(0, '23.964')] +[2023-02-24 23:34:12,956][12151] Saving new best policy, reward=23.964! +[2023-02-24 23:34:17,945][00517] Fps is (10 sec: 2867.1, 60 sec: 3208.5, 300 sec: 3151.8). Total num frames: 2998272. Throughput: 0: 791.2. Samples: 750456. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:34:17,954][00517] Avg episode reward: [(0, '23.112')] +[2023-02-24 23:34:22,945][00517] Fps is (10 sec: 2048.0, 60 sec: 3140.3, 300 sec: 3124.1). Total num frames: 3010560. Throughput: 0: 793.1. Samples: 754248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:34:22,952][00517] Avg episode reward: [(0, '23.646')] +[2023-02-24 23:34:27,579][12169] Updated weights for policy 0, policy_version 740 (0.0030) +[2023-02-24 23:34:27,944][00517] Fps is (10 sec: 3276.9, 60 sec: 3140.3, 300 sec: 3138.0). Total num frames: 3031040. Throughput: 0: 806.9. Samples: 756786. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:34:27,952][00517] Avg episode reward: [(0, '22.966')] +[2023-02-24 23:34:32,947][00517] Fps is (10 sec: 3276.1, 60 sec: 3071.9, 300 sec: 3137.9). Total num frames: 3043328. Throughput: 0: 770.7. Samples: 761042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:34:32,955][00517] Avg episode reward: [(0, '24.665')] +[2023-02-24 23:34:32,978][12151] Saving new best policy, reward=24.665! +[2023-02-24 23:34:37,945][00517] Fps is (10 sec: 2047.9, 60 sec: 3003.9, 300 sec: 3124.1). Total num frames: 3051520. Throughput: 0: 717.2. Samples: 764200. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:34:37,952][00517] Avg episode reward: [(0, '24.944')] +[2023-02-24 23:34:37,957][12151] Saving new best policy, reward=24.944! +[2023-02-24 23:34:42,949][00517] Fps is (10 sec: 2047.6, 60 sec: 3003.5, 300 sec: 3096.3). Total num frames: 3063808. Throughput: 0: 707.5. Samples: 765670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:34:42,952][00517] Avg episode reward: [(0, '23.915')] +[2023-02-24 23:34:45,587][12169] Updated weights for policy 0, policy_version 750 (0.0052) +[2023-02-24 23:34:47,944][00517] Fps is (10 sec: 2457.8, 60 sec: 2867.2, 300 sec: 3068.5). Total num frames: 3076096. Throughput: 0: 707.7. Samples: 769416. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:34:47,950][00517] Avg episode reward: [(0, '23.876')] +[2023-02-24 23:34:52,944][00517] Fps is (10 sec: 3278.2, 60 sec: 2867.2, 300 sec: 3096.4). Total num frames: 3096576. Throughput: 0: 715.8. Samples: 775246. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:34:52,947][00517] Avg episode reward: [(0, '24.305')] +[2023-02-24 23:34:56,446][12169] Updated weights for policy 0, policy_version 760 (0.0017) +[2023-02-24 23:34:57,949][00517] Fps is (10 sec: 4094.1, 60 sec: 3003.5, 300 sec: 3124.0). Total num frames: 3117056. Throughput: 0: 716.7. Samples: 778290. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:34:57,958][00517] Avg episode reward: [(0, '23.338')] +[2023-02-24 23:35:02,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 3110.2). Total num frames: 3129344. Throughput: 0: 717.2. Samples: 782732. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:35:02,956][00517] Avg episode reward: [(0, '23.067')] +[2023-02-24 23:35:07,947][00517] Fps is (10 sec: 2458.2, 60 sec: 2867.1, 300 sec: 3082.4). Total num frames: 3141632. Throughput: 0: 718.5. Samples: 786584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:07,950][00517] Avg episode reward: [(0, '21.452')] +[2023-02-24 23:35:10,677][12169] Updated weights for policy 0, policy_version 770 (0.0024) +[2023-02-24 23:35:12,945][00517] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 3096.3). Total num frames: 3162112. Throughput: 0: 723.8. Samples: 789356. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:12,947][00517] Avg episode reward: [(0, '21.874')] +[2023-02-24 23:35:17,945][00517] Fps is (10 sec: 4096.7, 60 sec: 3072.0, 300 sec: 3124.1). Total num frames: 3182592. Throughput: 0: 764.0. Samples: 795420. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:35:17,955][00517] Avg episode reward: [(0, '21.377')] +[2023-02-24 23:35:21,974][12169] Updated weights for policy 0, policy_version 780 (0.0040) +[2023-02-24 23:35:22,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 3124.1). Total num frames: 3194880. Throughput: 0: 791.7. Samples: 799826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:35:22,951][00517] Avg episode reward: [(0, '20.464')] +[2023-02-24 23:35:27,944][00517] Fps is (10 sec: 2457.7, 60 sec: 2935.5, 300 sec: 3096.3). Total num frames: 3207168. Throughput: 0: 801.7. Samples: 801742. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:27,949][00517] Avg episode reward: [(0, '20.959')] +[2023-02-24 23:35:32,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3003.8, 300 sec: 3096.3). Total num frames: 3223552. Throughput: 0: 822.8. Samples: 806442. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:32,946][00517] Avg episode reward: [(0, '22.086')] +[2023-02-24 23:35:33,032][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000788_3227648.pth... +[2023-02-24 23:35:33,164][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000603_2469888.pth +[2023-02-24 23:35:35,163][12169] Updated weights for policy 0, policy_version 790 (0.0016) +[2023-02-24 23:35:37,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.6, 300 sec: 3124.1). Total num frames: 3244032. Throughput: 0: 827.2. Samples: 812472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:37,952][00517] Avg episode reward: [(0, '22.616')] +[2023-02-24 23:35:42,947][00517] Fps is (10 sec: 3685.6, 60 sec: 3276.9, 300 sec: 3124.0). Total num frames: 3260416. Throughput: 0: 817.1. Samples: 815056. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:35:42,950][00517] Avg episode reward: [(0, '23.354')] +[2023-02-24 23:35:47,949][00517] Fps is (10 sec: 2865.8, 60 sec: 3276.5, 300 sec: 3110.1). Total num frames: 3272704. Throughput: 0: 802.2. Samples: 818834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:35:47,953][00517] Avg episode reward: [(0, '24.118')] +[2023-02-24 23:35:48,380][12169] Updated weights for policy 0, policy_version 800 (0.0013) +[2023-02-24 23:35:52,944][00517] Fps is (10 sec: 2867.9, 60 sec: 3208.5, 300 sec: 3096.3). Total num frames: 3289088. Throughput: 0: 818.0. Samples: 823390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:35:52,948][00517] Avg episode reward: [(0, '24.197')] +[2023-02-24 23:35:57,944][00517] Fps is (10 sec: 3688.3, 60 sec: 3208.8, 300 sec: 3124.1). Total num frames: 3309568. Throughput: 0: 824.2. Samples: 826446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:35:57,950][00517] Avg episode reward: [(0, '25.714')] +[2023-02-24 23:35:57,953][12151] Saving new best policy, reward=25.714! +[2023-02-24 23:35:59,555][12169] Updated weights for policy 0, policy_version 810 (0.0025) +[2023-02-24 23:36:02,945][00517] Fps is (10 sec: 3686.3, 60 sec: 3276.8, 300 sec: 3138.0). Total num frames: 3325952. Throughput: 0: 814.5. Samples: 832074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:36:02,949][00517] Avg episode reward: [(0, '24.766')] +[2023-02-24 23:36:07,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3276.9, 300 sec: 3110.2). Total num frames: 3338240. Throughput: 0: 801.6. Samples: 835898. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:36:07,950][00517] Avg episode reward: [(0, '24.871')] +[2023-02-24 23:36:12,944][00517] Fps is (10 sec: 2867.3, 60 sec: 3208.5, 300 sec: 3110.2). Total num frames: 3354624. Throughput: 0: 800.3. Samples: 837754. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:36:12,955][00517] Avg episode reward: [(0, '24.333')] +[2023-02-24 23:36:14,006][12169] Updated weights for policy 0, policy_version 820 (0.0024) +[2023-02-24 23:36:17,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3124.1). Total num frames: 3371008. Throughput: 0: 814.1. Samples: 843078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:36:17,951][00517] Avg episode reward: [(0, '23.140')] +[2023-02-24 23:36:22,946][00517] Fps is (10 sec: 3685.9, 60 sec: 3276.7, 300 sec: 3165.7). Total num frames: 3391488. Throughput: 0: 806.1. Samples: 848746. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:36:22,954][00517] Avg episode reward: [(0, '23.457')] +[2023-02-24 23:36:25,604][12169] Updated weights for policy 0, policy_version 830 (0.0013) +[2023-02-24 23:36:27,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3165.7). Total num frames: 3403776. Throughput: 0: 791.3. Samples: 850662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:36:27,947][00517] Avg episode reward: [(0, '21.081')] +[2023-02-24 23:36:32,945][00517] Fps is (10 sec: 2457.9, 60 sec: 3208.5, 300 sec: 3151.8). Total num frames: 3416064. Throughput: 0: 793.2. Samples: 854526. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:36:32,954][00517] Avg episode reward: [(0, '20.418')] +[2023-02-24 23:36:37,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 3436544. Throughput: 0: 810.3. Samples: 859854. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 23:36:37,947][00517] Avg episode reward: [(0, '20.268')] +[2023-02-24 23:36:38,714][12169] Updated weights for policy 0, policy_version 840 (0.0017) +[2023-02-24 23:36:42,944][00517] Fps is (10 sec: 4096.1, 60 sec: 3276.9, 300 sec: 3179.6). Total num frames: 3457024. Throughput: 0: 810.6. Samples: 862924. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:36:42,951][00517] Avg episode reward: [(0, '21.486')] +[2023-02-24 23:36:47,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3277.1, 300 sec: 3179.6). Total num frames: 3469312. Throughput: 0: 794.0. Samples: 867804. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:36:47,947][00517] Avg episode reward: [(0, '21.310')] +[2023-02-24 23:36:51,710][12169] Updated weights for policy 0, policy_version 850 (0.0015) +[2023-02-24 23:36:52,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 3481600. Throughput: 0: 796.4. Samples: 871736. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:36:52,950][00517] Avg episode reward: [(0, '20.397')] +[2023-02-24 23:36:57,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3179.6). Total num frames: 3502080. Throughput: 0: 806.2. Samples: 874034. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:36:57,946][00517] Avg episode reward: [(0, '20.360')] +[2023-02-24 23:37:02,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 3518464. Throughput: 0: 817.9. Samples: 879884. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:37:02,947][00517] Avg episode reward: [(0, '21.490')] +[2023-02-24 23:37:03,308][12169] Updated weights for policy 0, policy_version 860 (0.0031) +[2023-02-24 23:37:07,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 3534848. Throughput: 0: 797.0. Samples: 884610. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:37:07,950][00517] Avg episode reward: [(0, '22.401')] +[2023-02-24 23:37:12,947][00517] Fps is (10 sec: 2866.4, 60 sec: 3208.4, 300 sec: 3179.6). Total num frames: 3547136. Throughput: 0: 796.0. Samples: 886482. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 23:37:12,950][00517] Avg episode reward: [(0, '22.730')] +[2023-02-24 23:37:17,506][12169] Updated weights for policy 0, policy_version 870 (0.0024) +[2023-02-24 23:37:17,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3165.7). Total num frames: 3563520. Throughput: 0: 805.7. Samples: 890784. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:37:17,954][00517] Avg episode reward: [(0, '23.044')] +[2023-02-24 23:37:22,944][00517] Fps is (10 sec: 3687.4, 60 sec: 3208.6, 300 sec: 3179.6). Total num frames: 3584000. Throughput: 0: 824.1. Samples: 896940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:37:22,948][00517] Avg episode reward: [(0, '23.344')] +[2023-02-24 23:37:27,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 3600384. Throughput: 0: 822.0. Samples: 899912. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:37:27,955][00517] Avg episode reward: [(0, '23.538')] +[2023-02-24 23:37:28,816][12169] Updated weights for policy 0, policy_version 880 (0.0023) +[2023-02-24 23:37:32,945][00517] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 3612672. Throughput: 0: 798.3. Samples: 903726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:37:32,949][00517] Avg episode reward: [(0, '23.840')] +[2023-02-24 23:37:32,962][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000882_3612672.pth... +[2023-02-24 23:37:33,140][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000698_2859008.pth +[2023-02-24 23:37:37,950][00517] Fps is (10 sec: 2456.3, 60 sec: 3140.0, 300 sec: 3151.8). Total num frames: 3624960. Throughput: 0: 781.6. Samples: 906914. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:37:37,953][00517] Avg episode reward: [(0, '23.387')] +[2023-02-24 23:37:42,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 3637248. Throughput: 0: 773.2. Samples: 908828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:37:42,949][00517] Avg episode reward: [(0, '23.296')] +[2023-02-24 23:37:45,731][12169] Updated weights for policy 0, policy_version 890 (0.0039) +[2023-02-24 23:37:47,944][00517] Fps is (10 sec: 2458.9, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 3649536. Throughput: 0: 735.6. Samples: 912984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:37:47,953][00517] Avg episode reward: [(0, '23.088')] +[2023-02-24 23:37:52,949][00517] Fps is (10 sec: 2866.0, 60 sec: 3071.8, 300 sec: 3137.9). Total num frames: 3665920. Throughput: 0: 723.3. Samples: 917162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:37:52,957][00517] Avg episode reward: [(0, '24.211')] +[2023-02-24 23:37:57,945][00517] Fps is (10 sec: 2867.1, 60 sec: 2935.5, 300 sec: 3124.1). Total num frames: 3678208. Throughput: 0: 724.1. Samples: 919064. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:37:57,949][00517] Avg episode reward: [(0, '24.535')] +[2023-02-24 23:37:59,958][12169] Updated weights for policy 0, policy_version 900 (0.0019) +[2023-02-24 23:38:02,945][00517] Fps is (10 sec: 3278.2, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 3698688. Throughput: 0: 742.3. Samples: 924188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:38:02,948][00517] Avg episode reward: [(0, '23.582')] +[2023-02-24 23:38:07,944][00517] Fps is (10 sec: 4096.1, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 3719168. Throughput: 0: 742.4. Samples: 930346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:38:07,947][00517] Avg episode reward: [(0, '22.179')] +[2023-02-24 23:38:10,537][12169] Updated weights for policy 0, policy_version 910 (0.0028) +[2023-02-24 23:38:12,946][00517] Fps is (10 sec: 3276.3, 60 sec: 3072.1, 300 sec: 3137.9). Total num frames: 3731456. Throughput: 0: 726.1. Samples: 932586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:38:12,953][00517] Avg episode reward: [(0, '22.069')] +[2023-02-24 23:38:17,944][00517] Fps is (10 sec: 2457.6, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 3743744. Throughput: 0: 727.1. Samples: 936446. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:38:17,951][00517] Avg episode reward: [(0, '21.897')] +[2023-02-24 23:38:22,944][00517] Fps is (10 sec: 2867.7, 60 sec: 2935.5, 300 sec: 3110.2). Total num frames: 3760128. Throughput: 0: 765.6. Samples: 941364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:38:22,953][00517] Avg episode reward: [(0, '20.662')] +[2023-02-24 23:38:24,175][12169] Updated weights for policy 0, policy_version 920 (0.0014) +[2023-02-24 23:38:27,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3003.7, 300 sec: 3124.1). Total num frames: 3780608. Throughput: 0: 790.7. Samples: 944410. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:38:27,953][00517] Avg episode reward: [(0, '21.898')] +[2023-02-24 23:38:32,945][00517] Fps is (10 sec: 3686.1, 60 sec: 3072.0, 300 sec: 3138.0). Total num frames: 3796992. Throughput: 0: 821.9. Samples: 949972. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 23:38:32,949][00517] Avg episode reward: [(0, '22.931')] +[2023-02-24 23:38:36,535][12169] Updated weights for policy 0, policy_version 930 (0.0032) +[2023-02-24 23:38:37,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3072.3, 300 sec: 3138.0). Total num frames: 3809280. Throughput: 0: 815.4. Samples: 953850. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:38:37,952][00517] Avg episode reward: [(0, '23.505')] +[2023-02-24 23:38:42,944][00517] Fps is (10 sec: 2867.5, 60 sec: 3140.3, 300 sec: 3124.1). Total num frames: 3825664. Throughput: 0: 814.5. Samples: 955718. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:38:42,954][00517] Avg episode reward: [(0, '25.089')] +[2023-02-24 23:38:47,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3124.1). Total num frames: 3846144. Throughput: 0: 829.7. Samples: 961524. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:38:47,948][00517] Avg episode reward: [(0, '26.503')] +[2023-02-24 23:38:47,955][12151] Saving new best policy, reward=26.503! +[2023-02-24 23:38:48,712][12169] Updated weights for policy 0, policy_version 940 (0.0025) +[2023-02-24 23:38:52,944][00517] Fps is (10 sec: 3686.4, 60 sec: 3277.0, 300 sec: 3138.0). Total num frames: 3862528. Throughput: 0: 818.2. Samples: 967166. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 23:38:52,952][00517] Avg episode reward: [(0, '28.992')] +[2023-02-24 23:38:52,973][12151] Saving new best policy, reward=28.992! +[2023-02-24 23:38:57,944][00517] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3151.8). Total num frames: 3878912. Throughput: 0: 809.0. Samples: 968988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 23:38:57,947][00517] Avg episode reward: [(0, '28.548')] +[2023-02-24 23:39:02,427][12169] Updated weights for policy 0, policy_version 950 (0.0019) +[2023-02-24 23:39:02,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 3891200. Throughput: 0: 809.6. Samples: 972880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 23:39:02,947][00517] Avg episode reward: [(0, '28.078')] +[2023-02-24 23:39:07,945][00517] Fps is (10 sec: 3276.7, 60 sec: 3208.5, 300 sec: 3124.1). Total num frames: 3911680. Throughput: 0: 828.8. Samples: 978660. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:39:07,948][00517] Avg episode reward: [(0, '26.877')] +[2023-02-24 23:39:12,950][00517] Fps is (10 sec: 3684.4, 60 sec: 3276.6, 300 sec: 3151.8). Total num frames: 3928064. Throughput: 0: 827.3. Samples: 981644. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 23:39:12,957][00517] Avg episode reward: [(0, '26.340')] +[2023-02-24 23:39:13,075][12169] Updated weights for policy 0, policy_version 960 (0.0030) +[2023-02-24 23:39:17,944][00517] Fps is (10 sec: 3276.9, 60 sec: 3345.1, 300 sec: 3165.7). Total num frames: 3944448. Throughput: 0: 804.1. Samples: 986158. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:39:17,950][00517] Avg episode reward: [(0, '25.087')] +[2023-02-24 23:39:22,945][00517] Fps is (10 sec: 2868.7, 60 sec: 3276.8, 300 sec: 3137.9). Total num frames: 3956736. Throughput: 0: 806.3. Samples: 990132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:39:22,947][00517] Avg episode reward: [(0, '23.775')] +[2023-02-24 23:39:26,815][12169] Updated weights for policy 0, policy_version 970 (0.0026) +[2023-02-24 23:39:27,944][00517] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3151.9). Total num frames: 3973120. Throughput: 0: 825.6. Samples: 992872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 23:39:27,952][00517] Avg episode reward: [(0, '22.337')] +[2023-02-24 23:39:32,944][00517] Fps is (10 sec: 4096.1, 60 sec: 3345.1, 300 sec: 3207.4). Total num frames: 3997696. Throughput: 0: 834.1. Samples: 999060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 23:39:32,951][00517] Avg episode reward: [(0, '23.931')] +[2023-02-24 23:39:32,968][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000976_3997696.pth... +[2023-02-24 23:39:33,114][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000788_3227648.pth +[2023-02-24 23:39:35,363][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-24 23:39:35,383][00517] Component Batcher_0 stopped! +[2023-02-24 23:39:35,373][12151] Stopping Batcher_0... +[2023-02-24 23:39:35,411][12151] Loop batcher_evt_loop terminating... +[2023-02-24 23:39:35,450][00517] Component RolloutWorker_w1 stopped! +[2023-02-24 23:39:35,482][00517] Component RolloutWorker_w3 stopped! +[2023-02-24 23:39:35,486][12174] Stopping RolloutWorker_w3... +[2023-02-24 23:39:35,487][12174] Loop rollout_proc3_evt_loop terminating... +[2023-02-24 23:39:35,490][00517] Component RolloutWorker_w7 stopped! +[2023-02-24 23:39:35,494][12177] Stopping RolloutWorker_w7... +[2023-02-24 23:39:35,495][12177] Loop rollout_proc7_evt_loop terminating... +[2023-02-24 23:39:35,454][12171] Stopping RolloutWorker_w1... +[2023-02-24 23:39:35,497][12171] Loop rollout_proc1_evt_loop terminating... +[2023-02-24 23:39:35,507][12169] Weights refcount: 2 0 +[2023-02-24 23:39:35,514][00517] Component InferenceWorker_p0-w0 stopped! +[2023-02-24 23:39:35,517][12169] Stopping InferenceWorker_p0-w0... +[2023-02-24 23:39:35,518][12169] Loop inference_proc0-0_evt_loop terminating... +[2023-02-24 23:39:35,592][00517] Component RolloutWorker_w4 stopped! +[2023-02-24 23:39:35,597][12175] Stopping RolloutWorker_w5... +[2023-02-24 23:39:35,599][12175] Loop rollout_proc5_evt_loop terminating... +[2023-02-24 23:39:35,600][00517] Component RolloutWorker_w5 stopped! +[2023-02-24 23:39:35,592][12173] Stopping RolloutWorker_w4... +[2023-02-24 23:39:35,615][12176] Stopping RolloutWorker_w6... +[2023-02-24 23:39:35,616][12176] Loop rollout_proc6_evt_loop terminating... +[2023-02-24 23:39:35,615][00517] Component RolloutWorker_w6 stopped! +[2023-02-24 23:39:35,614][12173] Loop rollout_proc4_evt_loop terminating... +[2023-02-24 23:39:35,681][00517] Component RolloutWorker_w0 stopped! +[2023-02-24 23:39:35,686][12170] Stopping RolloutWorker_w0... +[2023-02-24 23:39:35,687][12170] Loop rollout_proc0_evt_loop terminating... +[2023-02-24 23:39:35,778][00517] Component RolloutWorker_w2 stopped! +[2023-02-24 23:39:35,781][12172] Stopping RolloutWorker_w2... +[2023-02-24 23:39:35,782][12172] Loop rollout_proc2_evt_loop terminating... +[2023-02-24 23:39:35,800][12151] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000882_3612672.pth +[2023-02-24 23:39:35,832][12151] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-24 23:39:36,076][00517] Component LearnerWorker_p0 stopped! +[2023-02-24 23:39:36,082][00517] Waiting for process learner_proc0 to stop... +[2023-02-24 23:39:36,084][12151] Stopping LearnerWorker_p0... +[2023-02-24 23:39:36,084][12151] Loop learner_proc0_evt_loop terminating... +[2023-02-24 23:39:38,478][00517] Waiting for process inference_proc0-0 to join... +[2023-02-24 23:39:39,184][00517] Waiting for process rollout_proc0 to join... +[2023-02-24 23:39:40,011][00517] Waiting for process rollout_proc1 to join... +[2023-02-24 23:39:40,017][00517] Waiting for process rollout_proc2 to join... +[2023-02-24 23:39:40,018][00517] Waiting for process rollout_proc3 to join... +[2023-02-24 23:39:40,022][00517] Waiting for process rollout_proc4 to join... +[2023-02-24 23:39:40,024][00517] Waiting for process rollout_proc5 to join... +[2023-02-24 23:39:40,025][00517] Waiting for process rollout_proc6 to join... +[2023-02-24 23:39:40,028][00517] Waiting for process rollout_proc7 to join... +[2023-02-24 23:39:40,029][00517] Batcher 0 profile tree view: +batching: 27.9718, releasing_batches: 0.0399 +[2023-02-24 23:39:40,031][00517] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0208 + wait_policy_total: 624.7956 +update_model: 8.6746 + weight_update: 0.0019 +one_step: 0.0050 + handle_policy_step: 607.3502 + deserialize: 16.9076, stack: 3.4152, obs_to_device_normalize: 127.7088, forward: 302.7622, send_messages: 30.5260 + prepare_outputs: 95.7165 + to_cpu: 59.1457 +[2023-02-24 23:39:40,034][00517] Learner 0 profile tree view: +misc: 0.0137, prepare_batch: 16.7427 +train: 78.1201 + epoch_init: 0.0085, minibatch_init: 0.0066, losses_postprocess: 0.5775, kl_divergence: 0.6239, after_optimizer: 32.7876 + calculate_losses: 27.7755 + losses_init: 0.0040, forward_head: 1.8303, bptt_initial: 18.0967, tail: 1.3685, advantages_returns: 0.3260, losses: 3.2906 + bptt: 2.4813 + bptt_forward_core: 2.3633 + update: 15.6071 + clip: 1.5381 +[2023-02-24 23:39:40,036][00517] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3819, enqueue_policy_requests: 185.3731, env_step: 950.8473, overhead: 27.3429, complete_rollouts: 8.2684 +save_policy_outputs: 26.2566 + split_output_tensors: 12.8579 +[2023-02-24 23:39:40,037][00517] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4261, enqueue_policy_requests: 187.2035, env_step: 950.9613, overhead: 27.0608, complete_rollouts: 7.7175 +save_policy_outputs: 25.3300 + split_output_tensors: 12.3203 +[2023-02-24 23:39:40,039][00517] Loop Runner_EvtLoop terminating... +[2023-02-24 23:39:40,045][00517] Runner profile tree view: +main_loop: 1319.4659 +[2023-02-24 23:39:40,046][00517] Collected {0: 4005888}, FPS: 3036.0 +[2023-02-24 23:40:01,810][00517] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-24 23:40:01,813][00517] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-24 23:40:01,817][00517] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-24 23:40:01,819][00517] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-24 23:40:01,821][00517] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-24 23:40:01,823][00517] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-24 23:40:01,825][00517] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-24 23:40:01,827][00517] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-24 23:40:01,829][00517] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-24 23:40:01,831][00517] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-24 23:40:01,833][00517] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-24 23:40:01,834][00517] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-24 23:40:01,836][00517] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-24 23:40:01,838][00517] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-24 23:40:01,839][00517] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-24 23:40:01,866][00517] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 23:40:01,870][00517] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 23:40:01,872][00517] RunningMeanStd input shape: (1,) +[2023-02-24 23:40:01,890][00517] ConvEncoder: input_channels=3 +[2023-02-24 23:40:02,551][00517] Conv encoder output size: 512 +[2023-02-24 23:40:02,553][00517] Policy head output size: 512 +[2023-02-24 23:40:05,694][00517] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-24 23:40:07,234][00517] Num frames 100... +[2023-02-24 23:40:07,349][00517] Num frames 200... +[2023-02-24 23:40:07,464][00517] Num frames 300... +[2023-02-24 23:40:07,582][00517] Num frames 400... +[2023-02-24 23:40:07,702][00517] Num frames 500... +[2023-02-24 23:40:07,818][00517] Num frames 600... +[2023-02-24 23:40:07,946][00517] Num frames 700... +[2023-02-24 23:40:08,063][00517] Num frames 800... +[2023-02-24 23:40:08,186][00517] Num frames 900... +[2023-02-24 23:40:08,304][00517] Num frames 1000... +[2023-02-24 23:40:08,426][00517] Num frames 1100... +[2023-02-24 23:40:08,545][00517] Num frames 1200... +[2023-02-24 23:40:08,667][00517] Num frames 1300... +[2023-02-24 23:40:08,786][00517] Num frames 1400... +[2023-02-24 23:40:08,915][00517] Num frames 1500... +[2023-02-24 23:40:09,033][00517] Num frames 1600... +[2023-02-24 23:40:09,148][00517] Num frames 1700... +[2023-02-24 23:40:09,268][00517] Num frames 1800... +[2023-02-24 23:40:09,388][00517] Num frames 1900... +[2023-02-24 23:40:09,548][00517] Avg episode rewards: #0: 51.889, true rewards: #0: 19.890 +[2023-02-24 23:40:09,553][00517] Avg episode reward: 51.889, avg true_objective: 19.890 +[2023-02-24 23:40:09,570][00517] Num frames 2000... +[2023-02-24 23:40:09,703][00517] Num frames 2100... +[2023-02-24 23:40:09,843][00517] Num frames 2200... +[2023-02-24 23:40:09,979][00517] Num frames 2300... +[2023-02-24 23:40:10,113][00517] Num frames 2400... +[2023-02-24 23:40:10,246][00517] Num frames 2500... +[2023-02-24 23:40:10,361][00517] Num frames 2600... +[2023-02-24 23:40:10,485][00517] Num frames 2700... +[2023-02-24 23:40:10,601][00517] Num frames 2800... +[2023-02-24 23:40:10,721][00517] Num frames 2900... +[2023-02-24 23:40:10,836][00517] Num frames 3000... +[2023-02-24 23:40:10,909][00517] Avg episode rewards: #0: 37.065, true rewards: #0: 15.065 +[2023-02-24 23:40:10,914][00517] Avg episode reward: 37.065, avg true_objective: 15.065 +[2023-02-24 23:40:11,031][00517] Num frames 3100... +[2023-02-24 23:40:11,162][00517] Num frames 3200... +[2023-02-24 23:40:11,281][00517] Num frames 3300... +[2023-02-24 23:40:11,395][00517] Num frames 3400... +[2023-02-24 23:40:11,514][00517] Num frames 3500... +[2023-02-24 23:40:11,629][00517] Num frames 3600... +[2023-02-24 23:40:11,748][00517] Num frames 3700... +[2023-02-24 23:40:11,867][00517] Num frames 3800... +[2023-02-24 23:40:11,997][00517] Num frames 3900... +[2023-02-24 23:40:12,112][00517] Num frames 4000... +[2023-02-24 23:40:12,213][00517] Avg episode rewards: #0: 30.790, true rewards: #0: 13.457 +[2023-02-24 23:40:12,216][00517] Avg episode reward: 30.790, avg true_objective: 13.457 +[2023-02-24 23:40:12,292][00517] Num frames 4100... +[2023-02-24 23:40:12,411][00517] Num frames 4200... +[2023-02-24 23:40:12,529][00517] Num frames 4300... +[2023-02-24 23:40:12,649][00517] Num frames 4400... +[2023-02-24 23:40:12,765][00517] Num frames 4500... +[2023-02-24 23:40:12,888][00517] Num frames 4600... +[2023-02-24 23:40:13,011][00517] Num frames 4700... +[2023-02-24 23:40:13,120][00517] Avg episode rewards: #0: 27.352, true rewards: #0: 11.852 +[2023-02-24 23:40:13,122][00517] Avg episode reward: 27.352, avg true_objective: 11.852 +[2023-02-24 23:40:13,202][00517] Num frames 4800... +[2023-02-24 23:40:13,330][00517] Num frames 4900... +[2023-02-24 23:40:13,448][00517] Num frames 5000... +[2023-02-24 23:40:13,568][00517] Num frames 5100... +[2023-02-24 23:40:13,686][00517] Num frames 5200... +[2023-02-24 23:40:13,802][00517] Num frames 5300... +[2023-02-24 23:40:13,923][00517] Num frames 5400... +[2023-02-24 23:40:14,062][00517] Num frames 5500... +[2023-02-24 23:40:14,178][00517] Num frames 5600... +[2023-02-24 23:40:14,296][00517] Num frames 5700... +[2023-02-24 23:40:14,412][00517] Num frames 5800... +[2023-02-24 23:40:14,538][00517] Num frames 5900... +[2023-02-24 23:40:14,653][00517] Num frames 6000... +[2023-02-24 23:40:14,810][00517] Avg episode rewards: #0: 27.370, true rewards: #0: 12.170 +[2023-02-24 23:40:14,812][00517] Avg episode reward: 27.370, avg true_objective: 12.170 +[2023-02-24 23:40:14,835][00517] Num frames 6100... +[2023-02-24 23:40:14,965][00517] Num frames 6200... +[2023-02-24 23:40:15,085][00517] Num frames 6300... +[2023-02-24 23:40:15,208][00517] Num frames 6400... +[2023-02-24 23:40:15,272][00517] Avg episode rewards: #0: 23.508, true rewards: #0: 10.675 +[2023-02-24 23:40:15,273][00517] Avg episode reward: 23.508, avg true_objective: 10.675 +[2023-02-24 23:40:15,407][00517] Num frames 6500... +[2023-02-24 23:40:15,523][00517] Num frames 6600... +[2023-02-24 23:40:15,640][00517] Num frames 6700... +[2023-02-24 23:40:15,756][00517] Num frames 6800... +[2023-02-24 23:40:15,878][00517] Num frames 6900... +[2023-02-24 23:40:15,996][00517] Num frames 7000... +[2023-02-24 23:40:16,121][00517] Num frames 7100... +[2023-02-24 23:40:16,237][00517] Num frames 7200... +[2023-02-24 23:40:16,356][00517] Num frames 7300... +[2023-02-24 23:40:16,492][00517] Num frames 7400... +[2023-02-24 23:40:16,669][00517] Num frames 7500... +[2023-02-24 23:40:16,846][00517] Num frames 7600... +[2023-02-24 23:40:17,025][00517] Num frames 7700... +[2023-02-24 23:40:17,196][00517] Num frames 7800... +[2023-02-24 23:40:17,360][00517] Num frames 7900... +[2023-02-24 23:40:17,527][00517] Num frames 8000... +[2023-02-24 23:40:17,699][00517] Num frames 8100... +[2023-02-24 23:40:17,862][00517] Num frames 8200... +[2023-02-24 23:40:18,025][00517] Num frames 8300... +[2023-02-24 23:40:18,138][00517] Avg episode rewards: #0: 27.178, true rewards: #0: 11.893 +[2023-02-24 23:40:18,141][00517] Avg episode reward: 27.178, avg true_objective: 11.893 +[2023-02-24 23:40:18,275][00517] Num frames 8400... +[2023-02-24 23:40:18,439][00517] Num frames 8500... +[2023-02-24 23:40:18,606][00517] Num frames 8600... +[2023-02-24 23:40:18,787][00517] Num frames 8700... +[2023-02-24 23:40:18,956][00517] Num frames 8800... +[2023-02-24 23:40:19,130][00517] Num frames 8900... +[2023-02-24 23:40:19,298][00517] Num frames 9000... +[2023-02-24 23:40:19,466][00517] Num frames 9100... +[2023-02-24 23:40:19,634][00517] Num frames 9200... +[2023-02-24 23:40:19,814][00517] Avg episode rewards: #0: 26.467, true rewards: #0: 11.592 +[2023-02-24 23:40:19,817][00517] Avg episode reward: 26.467, avg true_objective: 11.592 +[2023-02-24 23:40:19,863][00517] Num frames 9300... +[2023-02-24 23:40:20,026][00517] Num frames 9400... +[2023-02-24 23:40:20,176][00517] Num frames 9500... +[2023-02-24 23:40:20,288][00517] Num frames 9600... +[2023-02-24 23:40:20,402][00517] Num frames 9700... +[2023-02-24 23:40:20,523][00517] Num frames 9800... +[2023-02-24 23:40:20,639][00517] Num frames 9900... +[2023-02-24 23:40:20,756][00517] Num frames 10000... +[2023-02-24 23:40:20,878][00517] Num frames 10100... +[2023-02-24 23:40:21,015][00517] Avg episode rewards: #0: 25.300, true rewards: #0: 11.300 +[2023-02-24 23:40:21,018][00517] Avg episode reward: 25.300, avg true_objective: 11.300 +[2023-02-24 23:40:21,059][00517] Num frames 10200... +[2023-02-24 23:40:21,200][00517] Num frames 10300... +[2023-02-24 23:40:21,314][00517] Num frames 10400... +[2023-02-24 23:40:21,433][00517] Num frames 10500... +[2023-02-24 23:40:21,551][00517] Num frames 10600... +[2023-02-24 23:40:21,680][00517] Num frames 10700... +[2023-02-24 23:40:21,807][00517] Num frames 10800... +[2023-02-24 23:40:21,923][00517] Num frames 10900... +[2023-02-24 23:40:22,048][00517] Num frames 11000... +[2023-02-24 23:40:22,170][00517] Num frames 11100... +[2023-02-24 23:40:22,299][00517] Num frames 11200... +[2023-02-24 23:40:22,415][00517] Num frames 11300... +[2023-02-24 23:40:22,535][00517] Num frames 11400... +[2023-02-24 23:40:22,656][00517] Num frames 11500... +[2023-02-24 23:40:22,771][00517] Num frames 11600... +[2023-02-24 23:40:22,886][00517] Num frames 11700... +[2023-02-24 23:40:23,004][00517] Num frames 11800... +[2023-02-24 23:40:23,121][00517] Num frames 11900... +[2023-02-24 23:40:23,247][00517] Num frames 12000... +[2023-02-24 23:40:23,367][00517] Num frames 12100... +[2023-02-24 23:40:23,451][00517] Avg episode rewards: #0: 27.823, true rewards: #0: 12.123 +[2023-02-24 23:40:23,453][00517] Avg episode reward: 27.823, avg true_objective: 12.123 +[2023-02-24 23:41:48,441][00517] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-25 00:10:19,684][00517] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 00:10:19,686][00517] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 00:10:19,688][00517] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 00:10:19,689][00517] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 00:10:19,694][00517] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 00:10:19,695][00517] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 00:10:19,697][00517] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-25 00:10:19,698][00517] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 00:10:19,700][00517] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-25 00:10:19,701][00517] Adding new argument 'hf_repository'='Iamvincent/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-25 00:10:19,703][00517] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 00:10:19,704][00517] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 00:10:19,706][00517] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 00:10:19,708][00517] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 00:10:19,709][00517] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 00:10:19,740][00517] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 00:10:19,744][00517] RunningMeanStd input shape: (1,) +[2023-02-25 00:10:19,761][00517] ConvEncoder: input_channels=3 +[2023-02-25 00:10:19,824][00517] Conv encoder output size: 512 +[2023-02-25 00:10:19,829][00517] Policy head output size: 512 +[2023-02-25 00:10:19,865][00517] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 00:10:20,573][00517] Num frames 100... +[2023-02-25 00:10:20,753][00517] Num frames 200... +[2023-02-25 00:10:20,922][00517] Num frames 300... +[2023-02-25 00:10:21,091][00517] Num frames 400... +[2023-02-25 00:10:21,261][00517] Num frames 500... +[2023-02-25 00:10:21,430][00517] Num frames 600... +[2023-02-25 00:10:21,597][00517] Num frames 700... +[2023-02-25 00:10:21,787][00517] Num frames 800... +[2023-02-25 00:10:21,999][00517] Avg episode rewards: #0: 15.960, true rewards: #0: 8.960 +[2023-02-25 00:10:22,002][00517] Avg episode reward: 15.960, avg true_objective: 8.960 +[2023-02-25 00:10:22,018][00517] Num frames 900... +[2023-02-25 00:10:22,187][00517] Num frames 1000... +[2023-02-25 00:10:22,365][00517] Num frames 1100... +[2023-02-25 00:10:22,536][00517] Num frames 1200... +[2023-02-25 00:10:22,714][00517] Num frames 1300... +[2023-02-25 00:10:22,924][00517] Num frames 1400... +[2023-02-25 00:10:23,098][00517] Num frames 1500... +[2023-02-25 00:10:23,267][00517] Num frames 1600... +[2023-02-25 00:10:23,476][00517] Num frames 1700... +[2023-02-25 00:10:23,630][00517] Num frames 1800... +[2023-02-25 00:10:23,746][00517] Num frames 1900... +[2023-02-25 00:10:23,880][00517] Num frames 2000... +[2023-02-25 00:10:24,012][00517] Num frames 2100... +[2023-02-25 00:10:24,129][00517] Num frames 2200... +[2023-02-25 00:10:24,249][00517] Num frames 2300... +[2023-02-25 00:10:24,377][00517] Num frames 2400... +[2023-02-25 00:10:24,496][00517] Num frames 2500... +[2023-02-25 00:10:24,616][00517] Num frames 2600... +[2023-02-25 00:10:24,736][00517] Num frames 2700... +[2023-02-25 00:10:24,872][00517] Num frames 2800... +[2023-02-25 00:10:24,991][00517] Num frames 2900... +[2023-02-25 00:10:25,163][00517] Avg episode rewards: #0: 37.979, true rewards: #0: 14.980 +[2023-02-25 00:10:25,166][00517] Avg episode reward: 37.979, avg true_objective: 14.980 +[2023-02-25 00:10:25,176][00517] Num frames 3000... +[2023-02-25 00:10:25,307][00517] Num frames 3100... +[2023-02-25 00:10:25,433][00517] Num frames 3200... +[2023-02-25 00:10:25,563][00517] Num frames 3300... +[2023-02-25 00:10:25,681][00517] Num frames 3400... +[2023-02-25 00:10:25,795][00517] Num frames 3500... +[2023-02-25 00:10:25,920][00517] Num frames 3600... +[2023-02-25 00:10:26,041][00517] Num frames 3700... +[2023-02-25 00:10:26,159][00517] Num frames 3800... +[2023-02-25 00:10:26,296][00517] Num frames 3900... +[2023-02-25 00:10:26,422][00517] Num frames 4000... +[2023-02-25 00:10:26,554][00517] Num frames 4100... +[2023-02-25 00:10:26,676][00517] Num frames 4200... +[2023-02-25 00:10:26,795][00517] Num frames 4300... +[2023-02-25 00:10:26,938][00517] Num frames 4400... +[2023-02-25 00:10:27,059][00517] Num frames 4500... +[2023-02-25 00:10:27,177][00517] Num frames 4600... +[2023-02-25 00:10:27,297][00517] Num frames 4700... +[2023-02-25 00:10:27,417][00517] Num frames 4800... +[2023-02-25 00:10:27,537][00517] Num frames 4900... +[2023-02-25 00:10:27,662][00517] Num frames 5000... +[2023-02-25 00:10:27,744][00517] Avg episode rewards: #0: 43.399, true rewards: #0: 16.733 +[2023-02-25 00:10:27,746][00517] Avg episode reward: 43.399, avg true_objective: 16.733 +[2023-02-25 00:10:27,852][00517] Num frames 5100... +[2023-02-25 00:10:27,999][00517] Num frames 5200... +[2023-02-25 00:10:28,126][00517] Num frames 5300... +[2023-02-25 00:10:28,264][00517] Num frames 5400... +[2023-02-25 00:10:28,384][00517] Num frames 5500... +[2023-02-25 00:10:28,503][00517] Num frames 5600... +[2023-02-25 00:10:28,625][00517] Num frames 5700... +[2023-02-25 00:10:28,747][00517] Num frames 5800... +[2023-02-25 00:10:28,864][00517] Num frames 5900... +[2023-02-25 00:10:28,996][00517] Num frames 6000... +[2023-02-25 00:10:29,114][00517] Num frames 6100... +[2023-02-25 00:10:29,246][00517] Num frames 6200... +[2023-02-25 00:10:29,413][00517] Num frames 6300... +[2023-02-25 00:10:29,535][00517] Num frames 6400... +[2023-02-25 00:10:29,657][00517] Num frames 6500... +[2023-02-25 00:10:29,741][00517] Avg episode rewards: #0: 42.559, true rewards: #0: 16.310 +[2023-02-25 00:10:29,743][00517] Avg episode reward: 42.559, avg true_objective: 16.310 +[2023-02-25 00:10:29,842][00517] Num frames 6600... +[2023-02-25 00:10:29,990][00517] Num frames 6700... +[2023-02-25 00:10:30,109][00517] Num frames 6800... +[2023-02-25 00:10:30,236][00517] Num frames 6900... +[2023-02-25 00:10:30,354][00517] Num frames 7000... +[2023-02-25 00:10:30,478][00517] Num frames 7100... +[2023-02-25 00:10:30,608][00517] Avg episode rewards: #0: 36.928, true rewards: #0: 14.328 +[2023-02-25 00:10:30,612][00517] Avg episode reward: 36.928, avg true_objective: 14.328 +[2023-02-25 00:10:30,662][00517] Num frames 7200... +[2023-02-25 00:10:30,797][00517] Num frames 7300... +[2023-02-25 00:10:30,919][00517] Num frames 7400... +[2023-02-25 00:10:31,044][00517] Num frames 7500... +[2023-02-25 00:10:31,169][00517] Num frames 7600... +[2023-02-25 00:10:31,289][00517] Num frames 7700... +[2023-02-25 00:10:31,412][00517] Num frames 7800... +[2023-02-25 00:10:31,535][00517] Num frames 7900... +[2023-02-25 00:10:31,668][00517] Avg episode rewards: #0: 33.770, true rewards: #0: 13.270 +[2023-02-25 00:10:31,670][00517] Avg episode reward: 33.770, avg true_objective: 13.270 +[2023-02-25 00:10:31,725][00517] Num frames 8000... +[2023-02-25 00:10:31,854][00517] Num frames 8100... +[2023-02-25 00:10:32,004][00517] Num frames 8200... +[2023-02-25 00:10:32,135][00517] Num frames 8300... +[2023-02-25 00:10:32,274][00517] Num frames 8400... +[2023-02-25 00:10:32,416][00517] Num frames 8500... +[2023-02-25 00:10:32,543][00517] Num frames 8600... +[2023-02-25 00:10:32,668][00517] Num frames 8700... +[2023-02-25 00:10:32,793][00517] Num frames 8800... +[2023-02-25 00:10:32,921][00517] Num frames 8900... +[2023-02-25 00:10:33,054][00517] Num frames 9000... +[2023-02-25 00:10:33,183][00517] Num frames 9100... +[2023-02-25 00:10:33,307][00517] Num frames 9200... +[2023-02-25 00:10:33,441][00517] Num frames 9300... +[2023-02-25 00:10:33,564][00517] Num frames 9400... +[2023-02-25 00:10:33,731][00517] Num frames 9500... +[2023-02-25 00:10:33,911][00517] Num frames 9600... +[2023-02-25 00:10:34,099][00517] Num frames 9700... +[2023-02-25 00:10:34,269][00517] Num frames 9800... +[2023-02-25 00:10:34,412][00517] Avg episode rewards: #0: 36.500, true rewards: #0: 14.071 +[2023-02-25 00:10:34,415][00517] Avg episode reward: 36.500, avg true_objective: 14.071 +[2023-02-25 00:10:34,517][00517] Num frames 9900... +[2023-02-25 00:10:34,700][00517] Num frames 10000... +[2023-02-25 00:10:34,865][00517] Num frames 10100... +[2023-02-25 00:10:35,040][00517] Num frames 10200... +[2023-02-25 00:10:35,214][00517] Num frames 10300... +[2023-02-25 00:10:35,391][00517] Num frames 10400... +[2023-02-25 00:10:35,558][00517] Num frames 10500... +[2023-02-25 00:10:35,729][00517] Num frames 10600... +[2023-02-25 00:10:35,897][00517] Num frames 10700... +[2023-02-25 00:10:36,071][00517] Num frames 10800... +[2023-02-25 00:10:36,245][00517] Num frames 10900... +[2023-02-25 00:10:36,417][00517] Num frames 11000... +[2023-02-25 00:10:36,593][00517] Avg episode rewards: #0: 35.207, true rewards: #0: 13.833 +[2023-02-25 00:10:36,596][00517] Avg episode reward: 35.207, avg true_objective: 13.833 +[2023-02-25 00:10:36,667][00517] Num frames 11100... +[2023-02-25 00:10:36,845][00517] Num frames 11200... +[2023-02-25 00:10:37,019][00517] Num frames 11300... +[2023-02-25 00:10:37,194][00517] Num frames 11400... +[2023-02-25 00:10:37,367][00517] Num frames 11500... +[2023-02-25 00:10:37,526][00517] Num frames 11600... +[2023-02-25 00:10:37,646][00517] Num frames 11700... +[2023-02-25 00:10:37,768][00517] Num frames 11800... +[2023-02-25 00:10:37,885][00517] Num frames 11900... +[2023-02-25 00:10:38,003][00517] Num frames 12000... +[2023-02-25 00:10:38,119][00517] Num frames 12100... +[2023-02-25 00:10:38,202][00517] Avg episode rewards: #0: 33.580, true rewards: #0: 13.469 +[2023-02-25 00:10:38,205][00517] Avg episode reward: 33.580, avg true_objective: 13.469 +[2023-02-25 00:10:38,300][00517] Num frames 12200... +[2023-02-25 00:10:38,420][00517] Num frames 12300... +[2023-02-25 00:10:38,543][00517] Num frames 12400... +[2023-02-25 00:10:38,662][00517] Num frames 12500... +[2023-02-25 00:10:38,785][00517] Num frames 12600... +[2023-02-25 00:10:38,906][00517] Num frames 12700... +[2023-02-25 00:10:39,028][00517] Num frames 12800... +[2023-02-25 00:10:39,151][00517] Num frames 12900... +[2023-02-25 00:10:39,280][00517] Num frames 13000... +[2023-02-25 00:10:39,401][00517] Num frames 13100... +[2023-02-25 00:10:39,526][00517] Num frames 13200... +[2023-02-25 00:10:39,647][00517] Num frames 13300... +[2023-02-25 00:10:39,776][00517] Num frames 13400... +[2023-02-25 00:10:39,908][00517] Avg episode rewards: #0: 33.366, true rewards: #0: 13.466 +[2023-02-25 00:10:39,910][00517] Avg episode reward: 33.366, avg true_objective: 13.466 +[2023-02-25 00:12:15,718][00517] Replay video saved to /content/train_dir/default_experiment/replay.mp4!