diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1240 @@ +[2023-02-25 17:35:24,283][14251] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-25 17:35:24,288][14251] Rollout worker 0 uses device cpu +[2023-02-25 17:35:24,289][14251] Rollout worker 1 uses device cpu +[2023-02-25 17:35:24,290][14251] Rollout worker 2 uses device cpu +[2023-02-25 17:35:24,291][14251] Rollout worker 3 uses device cpu +[2023-02-25 17:35:24,293][14251] Rollout worker 4 uses device cpu +[2023-02-25 17:35:24,294][14251] Rollout worker 5 uses device cpu +[2023-02-25 17:35:24,295][14251] Rollout worker 6 uses device cpu +[2023-02-25 17:35:24,296][14251] Rollout worker 7 uses device cpu +[2023-02-25 17:35:24,490][14251] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 17:35:24,495][14251] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-25 17:35:24,527][14251] Starting all processes... +[2023-02-25 17:35:24,529][14251] Starting process learner_proc0 +[2023-02-25 17:35:24,594][14251] Starting all processes... +[2023-02-25 17:35:24,605][14251] Starting process inference_proc0-0 +[2023-02-25 17:35:24,605][14251] Starting process rollout_proc0 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc1 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc2 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc3 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc4 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc5 +[2023-02-25 17:35:24,607][14251] Starting process rollout_proc6 +[2023-02-25 17:35:24,608][14251] Starting process rollout_proc7 +[2023-02-25 17:35:35,445][14390] Worker 1 uses CPU cores [1] +[2023-02-25 17:35:36,206][14388] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 17:35:36,212][14388] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-25 17:35:36,222][14397] Worker 3 uses CPU cores [1] +[2023-02-25 17:35:36,416][14389] Worker 0 uses CPU cores [0] +[2023-02-25 17:35:36,439][14374] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 17:35:36,439][14374] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-25 17:35:36,443][14395] Worker 2 uses CPU cores [0] +[2023-02-25 17:35:36,451][14398] Worker 5 uses CPU cores [1] +[2023-02-25 17:35:36,523][14396] Worker 4 uses CPU cores [0] +[2023-02-25 17:35:36,534][14400] Worker 6 uses CPU cores [0] +[2023-02-25 17:35:36,571][14399] Worker 7 uses CPU cores [1] +[2023-02-25 17:35:37,028][14388] Num visible devices: 1 +[2023-02-25 17:35:37,030][14374] Num visible devices: 1 +[2023-02-25 17:35:37,045][14374] Starting seed is not provided +[2023-02-25 17:35:37,045][14374] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 17:35:37,046][14374] Initializing actor-critic model on device cuda:0 +[2023-02-25 17:35:37,046][14374] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 17:35:37,048][14374] RunningMeanStd input shape: (1,) +[2023-02-25 17:35:37,062][14374] ConvEncoder: input_channels=3 +[2023-02-25 17:35:37,344][14374] Conv encoder output size: 512 +[2023-02-25 17:35:37,344][14374] Policy head output size: 512 +[2023-02-25 17:35:37,393][14374] Created Actor Critic model with architecture: +[2023-02-25 17:35:37,393][14374] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-25 17:35:44,484][14251] Heartbeat connected on Batcher_0 +[2023-02-25 17:35:44,491][14251] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-25 17:35:44,504][14251] Heartbeat connected on RolloutWorker_w0 +[2023-02-25 17:35:44,507][14251] Heartbeat connected on RolloutWorker_w1 +[2023-02-25 17:35:44,510][14251] Heartbeat connected on RolloutWorker_w2 +[2023-02-25 17:35:44,514][14251] Heartbeat connected on RolloutWorker_w3 +[2023-02-25 17:35:44,516][14251] Heartbeat connected on RolloutWorker_w4 +[2023-02-25 17:35:44,520][14251] Heartbeat connected on RolloutWorker_w5 +[2023-02-25 17:35:44,525][14251] Heartbeat connected on RolloutWorker_w6 +[2023-02-25 17:35:44,526][14251] Heartbeat connected on RolloutWorker_w7 +[2023-02-25 17:35:44,721][14374] Using optimizer +[2023-02-25 17:35:44,722][14374] No checkpoints found +[2023-02-25 17:35:44,722][14374] Did not load from checkpoint, starting from scratch! +[2023-02-25 17:35:44,722][14374] Initialized policy 0 weights for model version 0 +[2023-02-25 17:35:44,726][14374] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 17:35:44,733][14374] LearnerWorker_p0 finished initialization! +[2023-02-25 17:35:44,741][14251] Heartbeat connected on LearnerWorker_p0 +[2023-02-25 17:35:44,925][14388] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 17:35:44,927][14388] RunningMeanStd input shape: (1,) +[2023-02-25 17:35:44,939][14388] ConvEncoder: input_channels=3 +[2023-02-25 17:35:45,033][14388] Conv encoder output size: 512 +[2023-02-25 17:35:45,033][14388] Policy head output size: 512 +[2023-02-25 17:35:46,157][14251] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 17:35:48,075][14251] Inference worker 0-0 is ready! +[2023-02-25 17:35:48,077][14251] All inference workers are ready! Signal rollout workers to start! +[2023-02-25 17:35:48,218][14396] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,249][14390] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,254][14397] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,273][14398] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,276][14399] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,305][14389] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,320][14395] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,369][14400] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:35:48,553][14398] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process... +[2023-02-25 17:35:48,556][14397] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process... +[2023-02-25 17:35:48,556][14397] EvtLoop [rollout_proc3_evt_loop, process=rollout_proc3] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=() +Traceback (most recent call last): + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 228, in _game_init + self.game.init() +vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly. + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/usr/local/lib/python3.8/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 150, in init + env_runner.init(self.timing) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 418, in init + self._reset() + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 430, in _reset + observations, info = e.reset(seed=seed) # new way of doing seeding since Gym 0.26.0 + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 323, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/utils/make_env.py", line 125, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/utils/make_env.py", line 110, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 30, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 379, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/envs/env_wrappers.py", line 84, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 323, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 51, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 323, in reset + self._ensure_initialized() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 274, in _ensure_initialized + self.initialize() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 269, in initialize + self._game_init() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 244, in _game_init + raise EnvCriticalError() +sample_factory.envs.env_utils.EnvCriticalError +[2023-02-25 17:35:48,561][14397] Unhandled exception in evt loop rollout_proc3_evt_loop +[2023-02-25 17:35:48,554][14398] EvtLoop [rollout_proc5_evt_loop, process=rollout_proc5] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=() +Traceback (most recent call last): + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 228, in _game_init + self.game.init() +vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly. + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/usr/local/lib/python3.8/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 150, in init + env_runner.init(self.timing) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 418, in init + self._reset() + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 430, in _reset + observations, info = e.reset(seed=seed) # new way of doing seeding since Gym 0.26.0 + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 323, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/utils/make_env.py", line 125, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/algo/utils/make_env.py", line 110, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 30, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 379, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sample_factory/envs/env_wrappers.py", line 84, in reset + obs, info = self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/gym/core.py", line 323, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 51, in reset + return self.env.reset(**kwargs) + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 323, in reset + self._ensure_initialized() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 274, in _ensure_initialized + self.initialize() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 269, in initialize + self._game_init() + File "/usr/local/lib/python3.8/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 244, in _game_init + raise EnvCriticalError() +sample_factory.envs.env_utils.EnvCriticalError +[2023-02-25 17:35:48,563][14398] Unhandled exception in evt loop rollout_proc5_evt_loop +[2023-02-25 17:35:49,989][14395] Decorrelating experience for 0 frames... +[2023-02-25 17:35:49,991][14396] Decorrelating experience for 0 frames... +[2023-02-25 17:35:49,992][14400] Decorrelating experience for 0 frames... +[2023-02-25 17:35:50,098][14390] Decorrelating experience for 0 frames... +[2023-02-25 17:35:50,469][14399] Decorrelating experience for 0 frames... +[2023-02-25 17:35:50,905][14390] Decorrelating experience for 32 frames... +[2023-02-25 17:35:51,157][14251] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 17:35:51,416][14395] Decorrelating experience for 32 frames... +[2023-02-25 17:35:51,417][14396] Decorrelating experience for 32 frames... +[2023-02-25 17:35:51,419][14389] Decorrelating experience for 0 frames... +[2023-02-25 17:35:51,477][14400] Decorrelating experience for 32 frames... +[2023-02-25 17:35:51,858][14399] Decorrelating experience for 32 frames... +[2023-02-25 17:35:52,562][14390] Decorrelating experience for 64 frames... +[2023-02-25 17:35:52,589][14389] Decorrelating experience for 32 frames... +[2023-02-25 17:35:52,680][14395] Decorrelating experience for 64 frames... +[2023-02-25 17:35:52,703][14399] Decorrelating experience for 64 frames... +[2023-02-25 17:35:52,750][14400] Decorrelating experience for 64 frames... +[2023-02-25 17:35:53,358][14390] Decorrelating experience for 96 frames... +[2023-02-25 17:35:53,459][14396] Decorrelating experience for 64 frames... +[2023-02-25 17:35:53,496][14399] Decorrelating experience for 96 frames... +[2023-02-25 17:35:53,621][14389] Decorrelating experience for 64 frames... +[2023-02-25 17:35:54,413][14395] Decorrelating experience for 96 frames... +[2023-02-25 17:35:54,435][14400] Decorrelating experience for 96 frames... +[2023-02-25 17:35:54,511][14396] Decorrelating experience for 96 frames... +[2023-02-25 17:35:54,895][14389] Decorrelating experience for 96 frames... +[2023-02-25 17:35:56,157][14251] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 17:35:59,667][14374] Signal inference workers to stop experience collection... +[2023-02-25 17:35:59,683][14388] InferenceWorker_p0-w0: stopping experience collection +[2023-02-25 17:36:01,157][14251] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 158.5. Samples: 2378. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 17:36:01,160][14251] Avg episode reward: [(0, '2.490')] +[2023-02-25 17:36:02,212][14374] Signal inference workers to resume experience collection... +[2023-02-25 17:36:02,213][14388] InferenceWorker_p0-w0: resuming experience collection +[2023-02-25 17:36:06,157][14251] Fps is (10 sec: 1638.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 16384. Throughput: 0: 210.8. Samples: 4216. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 17:36:06,159][14251] Avg episode reward: [(0, '3.329')] +[2023-02-25 17:36:11,157][14251] Fps is (10 sec: 3276.9, 60 sec: 1310.7, 300 sec: 1310.7). Total num frames: 32768. Throughput: 0: 274.2. Samples: 6856. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0) +[2023-02-25 17:36:11,164][14251] Avg episode reward: [(0, '3.865')] +[2023-02-25 17:36:12,441][14388] Updated weights for policy 0, policy_version 10 (0.0549) +[2023-02-25 17:36:16,157][14251] Fps is (10 sec: 3686.4, 60 sec: 1774.9, 300 sec: 1774.9). Total num frames: 53248. Throughput: 0: 447.5. Samples: 13424. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:36:16,161][14251] Avg episode reward: [(0, '4.403')] +[2023-02-25 17:36:21,157][14251] Fps is (10 sec: 3686.4, 60 sec: 1989.5, 300 sec: 1989.5). Total num frames: 69632. Throughput: 0: 521.0. Samples: 18234. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:36:21,162][14251] Avg episode reward: [(0, '4.358')] +[2023-02-25 17:36:24,642][14388] Updated weights for policy 0, policy_version 20 (0.0016) +[2023-02-25 17:36:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 2150.4, 300 sec: 2150.4). Total num frames: 86016. Throughput: 0: 509.7. Samples: 20386. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:36:26,163][14251] Avg episode reward: [(0, '4.340')] +[2023-02-25 17:36:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 2366.6, 300 sec: 2366.6). Total num frames: 106496. Throughput: 0: 577.2. Samples: 25974. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:36:31,164][14251] Avg episode reward: [(0, '4.333')] +[2023-02-25 17:36:31,179][14374] Saving new best policy, reward=4.333! +[2023-02-25 17:36:34,908][14388] Updated weights for policy 0, policy_version 30 (0.0024) +[2023-02-25 17:36:36,157][14251] Fps is (10 sec: 4096.0, 60 sec: 2539.5, 300 sec: 2539.5). Total num frames: 126976. Throughput: 0: 716.8. Samples: 32254. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:36:36,160][14251] Avg episode reward: [(0, '4.287')] +[2023-02-25 17:36:41,157][14251] Fps is (10 sec: 3276.7, 60 sec: 2532.1, 300 sec: 2532.1). Total num frames: 139264. Throughput: 0: 769.2. Samples: 34616. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:36:41,163][14251] Avg episode reward: [(0, '4.398')] +[2023-02-25 17:36:41,175][14374] Saving new best policy, reward=4.398! +[2023-02-25 17:36:46,157][14251] Fps is (10 sec: 2867.2, 60 sec: 2594.1, 300 sec: 2594.1). Total num frames: 155648. Throughput: 0: 813.2. Samples: 38970. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:36:46,164][14251] Avg episode reward: [(0, '4.363')] +[2023-02-25 17:36:47,353][14388] Updated weights for policy 0, policy_version 40 (0.0024) +[2023-02-25 17:36:51,158][14251] Fps is (10 sec: 3686.2, 60 sec: 2935.4, 300 sec: 2709.6). Total num frames: 176128. Throughput: 0: 912.2. Samples: 45266. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:36:51,166][14251] Avg episode reward: [(0, '4.712')] +[2023-02-25 17:36:51,178][14374] Saving new best policy, reward=4.712! +[2023-02-25 17:36:56,159][14251] Fps is (10 sec: 4095.1, 60 sec: 3276.7, 300 sec: 2808.6). Total num frames: 196608. Throughput: 0: 923.5. Samples: 48414. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:36:56,166][14251] Avg episode reward: [(0, '4.645')] +[2023-02-25 17:36:57,703][14388] Updated weights for policy 0, policy_version 50 (0.0015) +[2023-02-25 17:37:01,162][14251] Fps is (10 sec: 3684.8, 60 sec: 3549.6, 300 sec: 2839.7). Total num frames: 212992. Throughput: 0: 880.3. Samples: 53042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:37:01,165][14251] Avg episode reward: [(0, '4.545')] +[2023-02-25 17:37:06,157][14251] Fps is (10 sec: 3277.6, 60 sec: 3549.9, 300 sec: 2867.2). Total num frames: 229376. Throughput: 0: 893.0. Samples: 58420. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:37:06,159][14251] Avg episode reward: [(0, '4.530')] +[2023-02-25 17:37:09,274][14388] Updated weights for policy 0, policy_version 60 (0.0033) +[2023-02-25 17:37:11,157][14251] Fps is (10 sec: 4098.1, 60 sec: 3686.4, 300 sec: 2987.7). Total num frames: 253952. Throughput: 0: 919.6. Samples: 61768. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:37:11,162][14251] Avg episode reward: [(0, '4.448')] +[2023-02-25 17:37:16,157][14251] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3003.7). Total num frames: 270336. Throughput: 0: 931.2. Samples: 67880. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:37:16,160][14251] Avg episode reward: [(0, '4.493')] +[2023-02-25 17:37:21,071][14388] Updated weights for policy 0, policy_version 70 (0.0016) +[2023-02-25 17:37:21,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3018.1). Total num frames: 286720. Throughput: 0: 885.3. Samples: 72094. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:37:21,162][14251] Avg episode reward: [(0, '4.405')] +[2023-02-25 17:37:21,173][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000070_286720.pth... +[2023-02-25 17:37:26,157][14251] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3072.0). Total num frames: 307200. Throughput: 0: 897.0. Samples: 74980. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:37:26,164][14251] Avg episode reward: [(0, '4.504')] +[2023-02-25 17:37:30,968][14388] Updated weights for policy 0, policy_version 80 (0.0016) +[2023-02-25 17:37:31,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3120.8). Total num frames: 327680. Throughput: 0: 942.8. Samples: 81396. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:37:31,160][14251] Avg episode reward: [(0, '4.546')] +[2023-02-25 17:37:36,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3127.9). Total num frames: 344064. Throughput: 0: 911.2. Samples: 86268. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:37:36,162][14251] Avg episode reward: [(0, '4.719')] +[2023-02-25 17:37:36,169][14374] Saving new best policy, reward=4.719! +[2023-02-25 17:37:41,160][14251] Fps is (10 sec: 2866.3, 60 sec: 3618.0, 300 sec: 3098.6). Total num frames: 356352. Throughput: 0: 887.8. Samples: 88366. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:37:41,168][14251] Avg episode reward: [(0, '4.565')] +[2023-02-25 17:37:43,491][14388] Updated weights for policy 0, policy_version 90 (0.0014) +[2023-02-25 17:37:46,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3140.3). Total num frames: 376832. Throughput: 0: 916.3. Samples: 94270. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:37:46,160][14251] Avg episode reward: [(0, '4.408')] +[2023-02-25 17:37:51,157][14251] Fps is (10 sec: 4097.3, 60 sec: 3686.5, 300 sec: 3178.5). Total num frames: 397312. Throughput: 0: 935.3. Samples: 100508. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:37:51,162][14251] Avg episode reward: [(0, '4.520')] +[2023-02-25 17:37:54,297][14388] Updated weights for policy 0, policy_version 100 (0.0019) +[2023-02-25 17:37:56,162][14251] Fps is (10 sec: 3684.5, 60 sec: 3618.0, 300 sec: 3182.2). Total num frames: 413696. Throughput: 0: 907.5. Samples: 102612. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:37:56,165][14251] Avg episode reward: [(0, '4.561')] +[2023-02-25 17:38:01,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3618.4, 300 sec: 3185.8). Total num frames: 430080. Throughput: 0: 875.6. Samples: 107280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:38:01,160][14251] Avg episode reward: [(0, '4.454')] +[2023-02-25 17:38:05,555][14388] Updated weights for policy 0, policy_version 110 (0.0028) +[2023-02-25 17:38:06,157][14251] Fps is (10 sec: 3688.2, 60 sec: 3686.4, 300 sec: 3218.3). Total num frames: 450560. Throughput: 0: 922.5. Samples: 113606. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 17:38:06,163][14251] Avg episode reward: [(0, '4.267')] +[2023-02-25 17:38:11,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3248.6). Total num frames: 471040. Throughput: 0: 929.6. Samples: 116810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:38:11,160][14251] Avg episode reward: [(0, '4.253')] +[2023-02-25 17:38:16,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3222.2). Total num frames: 483328. Throughput: 0: 878.8. Samples: 120942. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:38:16,161][14251] Avg episode reward: [(0, '4.448')] +[2023-02-25 17:38:18,154][14388] Updated weights for policy 0, policy_version 120 (0.0014) +[2023-02-25 17:38:21,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3250.4). Total num frames: 503808. Throughput: 0: 894.0. Samples: 126496. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:38:21,160][14251] Avg episode reward: [(0, '4.420')] +[2023-02-25 17:38:26,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 524288. Throughput: 0: 919.2. Samples: 129726. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:38:26,162][14251] Avg episode reward: [(0, '4.348')] +[2023-02-25 17:38:27,630][14388] Updated weights for policy 0, policy_version 130 (0.0011) +[2023-02-25 17:38:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 540672. Throughput: 0: 913.4. Samples: 135372. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:38:31,163][14251] Avg episode reward: [(0, '4.554')] +[2023-02-25 17:38:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 557056. Throughput: 0: 870.0. Samples: 139658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:38:36,159][14251] Avg episode reward: [(0, '4.562')] +[2023-02-25 17:38:40,002][14388] Updated weights for policy 0, policy_version 140 (0.0032) +[2023-02-25 17:38:41,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3300.2). Total num frames: 577536. Throughput: 0: 891.7. Samples: 142732. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:38:41,160][14251] Avg episode reward: [(0, '4.876')] +[2023-02-25 17:38:41,173][14374] Saving new best policy, reward=4.876! +[2023-02-25 17:38:46,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3322.3). Total num frames: 598016. Throughput: 0: 931.1. Samples: 149178. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:38:46,159][14251] Avg episode reward: [(0, '4.878')] +[2023-02-25 17:38:46,169][14374] Saving new best policy, reward=4.878! +[2023-02-25 17:38:51,161][14251] Fps is (10 sec: 3275.5, 60 sec: 3549.6, 300 sec: 3298.9). Total num frames: 610304. Throughput: 0: 894.2. Samples: 153848. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:38:51,164][14251] Avg episode reward: [(0, '4.740')] +[2023-02-25 17:38:51,612][14388] Updated weights for policy 0, policy_version 150 (0.0012) +[2023-02-25 17:38:56,159][14251] Fps is (10 sec: 2866.6, 60 sec: 3550.0, 300 sec: 3298.3). Total num frames: 626688. Throughput: 0: 870.4. Samples: 155980. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:38:56,161][14251] Avg episode reward: [(0, '4.692')] +[2023-02-25 17:39:01,157][14251] Fps is (10 sec: 4097.6, 60 sec: 3686.4, 300 sec: 3339.8). Total num frames: 651264. Throughput: 0: 913.4. Samples: 162046. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:39:01,159][14251] Avg episode reward: [(0, '4.474')] +[2023-02-25 17:39:02,223][14388] Updated weights for policy 0, policy_version 160 (0.0013) +[2023-02-25 17:39:06,157][14251] Fps is (10 sec: 4096.9, 60 sec: 3618.1, 300 sec: 3338.2). Total num frames: 667648. Throughput: 0: 923.2. Samples: 168040. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:06,163][14251] Avg episode reward: [(0, '4.365')] +[2023-02-25 17:39:11,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3336.7). Total num frames: 684032. Throughput: 0: 898.5. Samples: 170158. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:39:11,162][14251] Avg episode reward: [(0, '4.363')] +[2023-02-25 17:39:14,781][14388] Updated weights for policy 0, policy_version 170 (0.0021) +[2023-02-25 17:39:16,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3335.3). Total num frames: 700416. Throughput: 0: 877.2. Samples: 174844. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:39:16,165][14251] Avg episode reward: [(0, '4.543')] +[2023-02-25 17:39:21,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3353.0). Total num frames: 720896. Throughput: 0: 925.8. Samples: 181320. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:39:21,159][14251] Avg episode reward: [(0, '4.525')] +[2023-02-25 17:39:21,176][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000176_720896.pth... +[2023-02-25 17:39:24,616][14388] Updated weights for policy 0, policy_version 180 (0.0020) +[2023-02-25 17:39:26,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3351.3). Total num frames: 737280. Throughput: 0: 924.8. Samples: 184346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:26,160][14251] Avg episode reward: [(0, '4.490')] +[2023-02-25 17:39:31,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3349.6). Total num frames: 753664. Throughput: 0: 875.2. Samples: 188560. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:39:31,163][14251] Avg episode reward: [(0, '4.666')] +[2023-02-25 17:39:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3348.0). Total num frames: 770048. Throughput: 0: 892.1. Samples: 193988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:36,160][14251] Avg episode reward: [(0, '4.741')] +[2023-02-25 17:39:37,013][14388] Updated weights for policy 0, policy_version 190 (0.0014) +[2023-02-25 17:39:41,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3381.4). Total num frames: 794624. Throughput: 0: 915.7. Samples: 197184. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:39:41,160][14251] Avg episode reward: [(0, '4.565')] +[2023-02-25 17:39:46,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3379.2). Total num frames: 811008. Throughput: 0: 903.0. Samples: 202682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:46,159][14251] Avg episode reward: [(0, '4.538')] +[2023-02-25 17:39:48,934][14388] Updated weights for policy 0, policy_version 200 (0.0012) +[2023-02-25 17:39:51,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3550.1, 300 sec: 3360.4). Total num frames: 823296. Throughput: 0: 867.1. Samples: 207060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:51,160][14251] Avg episode reward: [(0, '4.621')] +[2023-02-25 17:39:56,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3391.5). Total num frames: 847872. Throughput: 0: 892.6. Samples: 210324. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:39:56,160][14251] Avg episode reward: [(0, '4.817')] +[2023-02-25 17:40:00,257][14388] Updated weights for policy 0, policy_version 210 (0.0017) +[2023-02-25 17:40:01,162][14251] Fps is (10 sec: 3684.5, 60 sec: 3481.3, 300 sec: 3373.1). Total num frames: 860160. Throughput: 0: 900.3. Samples: 215362. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:40:01,165][14251] Avg episode reward: [(0, '4.841')] +[2023-02-25 17:40:06,157][14251] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3355.6). Total num frames: 872448. Throughput: 0: 835.7. Samples: 218928. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:40:06,161][14251] Avg episode reward: [(0, '4.819')] +[2023-02-25 17:40:11,157][14251] Fps is (10 sec: 2458.9, 60 sec: 3345.1, 300 sec: 3338.6). Total num frames: 884736. Throughput: 0: 813.1. Samples: 220936. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:40:11,163][14251] Avg episode reward: [(0, '4.596')] +[2023-02-25 17:40:14,147][14388] Updated weights for policy 0, policy_version 220 (0.0012) +[2023-02-25 17:40:16,157][14251] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 3367.8). Total num frames: 909312. Throughput: 0: 848.5. Samples: 226742. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:40:16,163][14251] Avg episode reward: [(0, '4.695')] +[2023-02-25 17:40:21,157][14251] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3381.1). Total num frames: 929792. Throughput: 0: 871.4. Samples: 233200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:40:21,160][14251] Avg episode reward: [(0, '4.825')] +[2023-02-25 17:40:24,782][14388] Updated weights for policy 0, policy_version 230 (0.0018) +[2023-02-25 17:40:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3364.6). Total num frames: 942080. Throughput: 0: 848.7. Samples: 235374. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:40:26,162][14251] Avg episode reward: [(0, '4.821')] +[2023-02-25 17:40:31,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3363.0). Total num frames: 958464. Throughput: 0: 826.5. Samples: 239876. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:40:31,167][14251] Avg episode reward: [(0, '4.975')] +[2023-02-25 17:40:31,197][14374] Saving new best policy, reward=4.975! +[2023-02-25 17:40:36,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3375.7). Total num frames: 978944. Throughput: 0: 868.2. Samples: 246130. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:40:36,160][14251] Avg episode reward: [(0, '5.434')] +[2023-02-25 17:40:36,178][14374] Saving new best policy, reward=5.434! +[2023-02-25 17:40:36,187][14388] Updated weights for policy 0, policy_version 240 (0.0019) +[2023-02-25 17:40:41,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3387.9). Total num frames: 999424. Throughput: 0: 864.5. Samples: 249226. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:40:41,160][14251] Avg episode reward: [(0, '5.679')] +[2023-02-25 17:40:41,171][14374] Saving new best policy, reward=5.679! +[2023-02-25 17:40:46,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 1011712. Throughput: 0: 848.1. Samples: 253522. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:40:46,163][14251] Avg episode reward: [(0, '5.488')] +[2023-02-25 17:40:48,649][14388] Updated weights for policy 0, policy_version 250 (0.0014) +[2023-02-25 17:40:51,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3499.0). Total num frames: 1032192. Throughput: 0: 891.5. Samples: 259046. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:40:51,160][14251] Avg episode reward: [(0, '5.472')] +[2023-02-25 17:40:56,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 1052672. Throughput: 0: 918.5. Samples: 262268. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:40:56,159][14251] Avg episode reward: [(0, '5.457')] +[2023-02-25 17:40:58,411][14388] Updated weights for policy 0, policy_version 260 (0.0015) +[2023-02-25 17:41:01,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3481.9, 300 sec: 3568.4). Total num frames: 1069056. Throughput: 0: 913.6. Samples: 267854. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:41:01,162][14251] Avg episode reward: [(0, '6.141')] +[2023-02-25 17:41:01,254][14374] Saving new best policy, reward=6.141! +[2023-02-25 17:41:06,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 1085440. Throughput: 0: 864.3. Samples: 272094. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:06,162][14251] Avg episode reward: [(0, '6.469')] +[2023-02-25 17:41:06,167][14374] Saving new best policy, reward=6.469! +[2023-02-25 17:41:10,779][14388] Updated weights for policy 0, policy_version 270 (0.0017) +[2023-02-25 17:41:11,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 1105920. Throughput: 0: 886.3. Samples: 275258. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:11,165][14251] Avg episode reward: [(0, '7.264')] +[2023-02-25 17:41:11,172][14374] Saving new best policy, reward=7.264! +[2023-02-25 17:41:16,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1126400. Throughput: 0: 930.2. Samples: 281736. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:41:16,159][14251] Avg episode reward: [(0, '6.702')] +[2023-02-25 17:41:21,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 1142784. Throughput: 0: 896.6. Samples: 286476. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 17:41:21,160][14251] Avg episode reward: [(0, '6.578')] +[2023-02-25 17:41:21,173][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000279_1142784.pth... +[2023-02-25 17:41:21,321][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000070_286720.pth +[2023-02-25 17:41:21,948][14388] Updated weights for policy 0, policy_version 280 (0.0017) +[2023-02-25 17:41:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 1159168. Throughput: 0: 876.8. Samples: 288682. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:41:26,160][14251] Avg episode reward: [(0, '6.829')] +[2023-02-25 17:41:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 1179648. Throughput: 0: 917.6. Samples: 294816. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:41:31,160][14251] Avg episode reward: [(0, '7.093')] +[2023-02-25 17:41:32,574][14388] Updated weights for policy 0, policy_version 290 (0.0015) +[2023-02-25 17:41:36,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 1200128. Throughput: 0: 930.1. Samples: 300900. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:41:36,164][14251] Avg episode reward: [(0, '7.214')] +[2023-02-25 17:41:41,159][14251] Fps is (10 sec: 3276.1, 60 sec: 3549.7, 300 sec: 3582.2). Total num frames: 1212416. Throughput: 0: 907.4. Samples: 303104. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:41,162][14251] Avg episode reward: [(0, '7.255')] +[2023-02-25 17:41:44,804][14388] Updated weights for policy 0, policy_version 300 (0.0020) +[2023-02-25 17:41:46,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1232896. Throughput: 0: 891.8. Samples: 307986. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:46,160][14251] Avg episode reward: [(0, '7.521')] +[2023-02-25 17:41:46,162][14374] Saving new best policy, reward=7.521! +[2023-02-25 17:41:51,157][14251] Fps is (10 sec: 4096.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1253376. Throughput: 0: 938.8. Samples: 314338. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:51,164][14251] Avg episode reward: [(0, '7.637')] +[2023-02-25 17:41:51,177][14374] Saving new best policy, reward=7.637! +[2023-02-25 17:41:54,721][14388] Updated weights for policy 0, policy_version 310 (0.0015) +[2023-02-25 17:41:56,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1269760. Throughput: 0: 934.8. Samples: 317322. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:41:56,163][14251] Avg episode reward: [(0, '7.869')] +[2023-02-25 17:41:56,190][14374] Saving new best policy, reward=7.869! +[2023-02-25 17:42:01,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1286144. Throughput: 0: 884.7. Samples: 321546. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:01,160][14251] Avg episode reward: [(0, '8.031')] +[2023-02-25 17:42:01,175][14374] Saving new best policy, reward=8.031! +[2023-02-25 17:42:06,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 1306624. Throughput: 0: 905.4. Samples: 327220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:06,160][14251] Avg episode reward: [(0, '7.731')] +[2023-02-25 17:42:06,972][14388] Updated weights for policy 0, policy_version 320 (0.0024) +[2023-02-25 17:42:11,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1327104. Throughput: 0: 926.9. Samples: 330394. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:42:11,160][14251] Avg episode reward: [(0, '7.270')] +[2023-02-25 17:42:16,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1343488. Throughput: 0: 910.2. Samples: 335776. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:42:16,162][14251] Avg episode reward: [(0, '7.640')] +[2023-02-25 17:42:18,912][14388] Updated weights for policy 0, policy_version 330 (0.0021) +[2023-02-25 17:42:21,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 1359872. Throughput: 0: 874.2. Samples: 340240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:21,160][14251] Avg episode reward: [(0, '7.637')] +[2023-02-25 17:42:26,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 1380352. Throughput: 0: 899.0. Samples: 343558. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:42:26,159][14251] Avg episode reward: [(0, '7.729')] +[2023-02-25 17:42:28,769][14388] Updated weights for policy 0, policy_version 340 (0.0019) +[2023-02-25 17:42:31,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1400832. Throughput: 0: 934.4. Samples: 350032. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:42:31,159][14251] Avg episode reward: [(0, '7.754')] +[2023-02-25 17:42:36,159][14251] Fps is (10 sec: 3276.1, 60 sec: 3549.7, 300 sec: 3582.3). Total num frames: 1413120. Throughput: 0: 892.7. Samples: 354510. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:42:36,165][14251] Avg episode reward: [(0, '8.667')] +[2023-02-25 17:42:36,169][14374] Saving new best policy, reward=8.667! +[2023-02-25 17:42:41,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3618.3, 300 sec: 3568.4). Total num frames: 1429504. Throughput: 0: 874.0. Samples: 356650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:41,161][14251] Avg episode reward: [(0, '8.765')] +[2023-02-25 17:42:41,244][14374] Saving new best policy, reward=8.765! +[2023-02-25 17:42:41,252][14388] Updated weights for policy 0, policy_version 350 (0.0028) +[2023-02-25 17:42:46,159][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.3, 300 sec: 3582.2). Total num frames: 1454080. Throughput: 0: 920.1. Samples: 362952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:46,161][14251] Avg episode reward: [(0, '9.168')] +[2023-02-25 17:42:46,167][14374] Saving new best policy, reward=9.168! +[2023-02-25 17:42:51,157][14251] Fps is (10 sec: 4095.8, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1470464. Throughput: 0: 918.4. Samples: 368550. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:42:51,160][14251] Avg episode reward: [(0, '8.807')] +[2023-02-25 17:42:51,763][14388] Updated weights for policy 0, policy_version 360 (0.0014) +[2023-02-25 17:42:56,157][14251] Fps is (10 sec: 3277.5, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1486848. Throughput: 0: 895.7. Samples: 370700. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:42:56,159][14251] Avg episode reward: [(0, '8.639')] +[2023-02-25 17:43:01,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 1503232. Throughput: 0: 892.5. Samples: 375940. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:43:01,164][14251] Avg episode reward: [(0, '8.249')] +[2023-02-25 17:43:03,188][14388] Updated weights for policy 0, policy_version 370 (0.0014) +[2023-02-25 17:43:06,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1527808. Throughput: 0: 938.7. Samples: 382482. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:43:06,163][14251] Avg episode reward: [(0, '8.095')] +[2023-02-25 17:43:11,159][14251] Fps is (10 sec: 4095.3, 60 sec: 3618.0, 300 sec: 3596.1). Total num frames: 1544192. Throughput: 0: 923.3. Samples: 385110. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:43:11,161][14251] Avg episode reward: [(0, '8.367')] +[2023-02-25 17:43:15,142][14388] Updated weights for policy 0, policy_version 380 (0.0019) +[2023-02-25 17:43:16,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 1556480. Throughput: 0: 874.4. Samples: 389380. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:43:16,160][14251] Avg episode reward: [(0, '9.579')] +[2023-02-25 17:43:16,162][14374] Saving new best policy, reward=9.579! +[2023-02-25 17:43:21,159][14251] Fps is (10 sec: 3276.7, 60 sec: 3618.0, 300 sec: 3568.4). Total num frames: 1576960. Throughput: 0: 910.4. Samples: 395480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:43:21,161][14251] Avg episode reward: [(0, '9.795')] +[2023-02-25 17:43:21,207][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000386_1581056.pth... +[2023-02-25 17:43:21,332][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000176_720896.pth +[2023-02-25 17:43:21,347][14374] Saving new best policy, reward=9.795! +[2023-02-25 17:43:25,231][14388] Updated weights for policy 0, policy_version 390 (0.0014) +[2023-02-25 17:43:26,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 1597440. Throughput: 0: 929.6. Samples: 398480. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:43:26,160][14251] Avg episode reward: [(0, '11.438')] +[2023-02-25 17:43:26,165][14374] Saving new best policy, reward=11.438! +[2023-02-25 17:43:31,157][14251] Fps is (10 sec: 3687.1, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 1613824. Throughput: 0: 898.5. Samples: 403384. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:43:31,163][14251] Avg episode reward: [(0, '11.230')] +[2023-02-25 17:43:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3568.4). Total num frames: 1630208. Throughput: 0: 879.6. Samples: 408130. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:43:36,160][14251] Avg episode reward: [(0, '11.218')] +[2023-02-25 17:43:38,054][14388] Updated weights for policy 0, policy_version 400 (0.0025) +[2023-02-25 17:43:41,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 1650688. Throughput: 0: 901.2. Samples: 411252. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:43:41,163][14251] Avg episode reward: [(0, '11.591')] +[2023-02-25 17:43:41,173][14374] Saving new best policy, reward=11.591! +[2023-02-25 17:43:46,160][14251] Fps is (10 sec: 4094.8, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 1671168. Throughput: 0: 927.2. Samples: 417668. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:43:46,164][14251] Avg episode reward: [(0, '10.279')] +[2023-02-25 17:43:48,933][14388] Updated weights for policy 0, policy_version 410 (0.0015) +[2023-02-25 17:43:51,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 1683456. Throughput: 0: 875.4. Samples: 421874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:43:51,160][14251] Avg episode reward: [(0, '10.105')] +[2023-02-25 17:43:56,157][14251] Fps is (10 sec: 3277.7, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 1703936. Throughput: 0: 872.6. Samples: 424374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:43:56,160][14251] Avg episode reward: [(0, '9.031')] +[2023-02-25 17:43:59,845][14388] Updated weights for policy 0, policy_version 420 (0.0015) +[2023-02-25 17:44:01,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 1724416. Throughput: 0: 921.8. Samples: 430862. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:44:01,162][14251] Avg episode reward: [(0, '9.572')] +[2023-02-25 17:44:06,159][14251] Fps is (10 sec: 3685.9, 60 sec: 3549.8, 300 sec: 3582.2). Total num frames: 1740800. Throughput: 0: 906.5. Samples: 436274. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:44:06,164][14251] Avg episode reward: [(0, '10.606')] +[2023-02-25 17:44:11,158][14251] Fps is (10 sec: 3276.6, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 1757184. Throughput: 0: 885.6. Samples: 438332. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:44:11,166][14251] Avg episode reward: [(0, '11.111')] +[2023-02-25 17:44:12,230][14388] Updated weights for policy 0, policy_version 430 (0.0021) +[2023-02-25 17:44:16,158][14251] Fps is (10 sec: 3686.6, 60 sec: 3686.3, 300 sec: 3582.3). Total num frames: 1777664. Throughput: 0: 901.4. Samples: 443950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:44:16,161][14251] Avg episode reward: [(0, '12.609')] +[2023-02-25 17:44:16,165][14374] Saving new best policy, reward=12.609! +[2023-02-25 17:44:21,157][14251] Fps is (10 sec: 4096.2, 60 sec: 3686.5, 300 sec: 3596.1). Total num frames: 1798144. Throughput: 0: 939.3. Samples: 450400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:44:21,160][14251] Avg episode reward: [(0, '13.458')] +[2023-02-25 17:44:21,171][14374] Saving new best policy, reward=13.458! +[2023-02-25 17:44:21,574][14388] Updated weights for policy 0, policy_version 440 (0.0014) +[2023-02-25 17:44:26,157][14251] Fps is (10 sec: 3686.8, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 1814528. Throughput: 0: 921.0. Samples: 452696. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:44:26,160][14251] Avg episode reward: [(0, '14.467')] +[2023-02-25 17:44:26,167][14374] Saving new best policy, reward=14.467! +[2023-02-25 17:44:31,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 1830912. Throughput: 0: 872.4. Samples: 456924. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:44:31,160][14251] Avg episode reward: [(0, '14.539')] +[2023-02-25 17:44:31,168][14374] Saving new best policy, reward=14.539! +[2023-02-25 17:44:34,051][14388] Updated weights for policy 0, policy_version 450 (0.0011) +[2023-02-25 17:44:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 1847296. Throughput: 0: 921.1. Samples: 463322. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:44:36,163][14251] Avg episode reward: [(0, '14.689')] +[2023-02-25 17:44:36,184][14374] Saving new best policy, reward=14.689! +[2023-02-25 17:44:41,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 1871872. Throughput: 0: 933.6. Samples: 466384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:44:41,167][14251] Avg episode reward: [(0, '14.785')] +[2023-02-25 17:44:41,184][14374] Saving new best policy, reward=14.785! +[2023-02-25 17:44:45,924][14388] Updated weights for policy 0, policy_version 460 (0.0014) +[2023-02-25 17:44:46,158][14251] Fps is (10 sec: 3685.9, 60 sec: 3550.0, 300 sec: 3596.1). Total num frames: 1884160. Throughput: 0: 892.3. Samples: 471016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:44:46,162][14251] Avg episode reward: [(0, '14.311')] +[2023-02-25 17:44:51,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 1900544. Throughput: 0: 884.0. Samples: 476052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:44:51,160][14251] Avg episode reward: [(0, '14.352')] +[2023-02-25 17:44:56,157][14251] Fps is (10 sec: 3686.8, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 1921024. Throughput: 0: 908.5. Samples: 479212. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:44:56,159][14251] Avg episode reward: [(0, '14.674')] +[2023-02-25 17:44:56,391][14388] Updated weights for policy 0, policy_version 470 (0.0024) +[2023-02-25 17:45:01,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 1941504. Throughput: 0: 917.7. Samples: 485246. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 17:45:01,162][14251] Avg episode reward: [(0, '15.398')] +[2023-02-25 17:45:01,174][14374] Saving new best policy, reward=15.398! +[2023-02-25 17:45:06,158][14251] Fps is (10 sec: 3276.5, 60 sec: 3549.9, 300 sec: 3623.9). Total num frames: 1953792. Throughput: 0: 869.8. Samples: 489540. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:45:06,166][14251] Avg episode reward: [(0, '16.083')] +[2023-02-25 17:45:06,168][14374] Saving new best policy, reward=16.083! +[2023-02-25 17:45:08,992][14388] Updated weights for policy 0, policy_version 480 (0.0024) +[2023-02-25 17:45:11,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3610.0). Total num frames: 1974272. Throughput: 0: 878.4. Samples: 492224. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:45:11,160][14251] Avg episode reward: [(0, '16.927')] +[2023-02-25 17:45:11,180][14374] Saving new best policy, reward=16.927! +[2023-02-25 17:45:16,157][14251] Fps is (10 sec: 4096.3, 60 sec: 3618.2, 300 sec: 3610.0). Total num frames: 1994752. Throughput: 0: 927.2. Samples: 498648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:45:16,160][14251] Avg episode reward: [(0, '15.951')] +[2023-02-25 17:45:18,571][14388] Updated weights for policy 0, policy_version 490 (0.0011) +[2023-02-25 17:45:21,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3623.9). Total num frames: 2011136. Throughput: 0: 900.6. Samples: 503850. Policy #0 lag: (min: 0.0, avg: 0.2, max: 2.0) +[2023-02-25 17:45:21,159][14251] Avg episode reward: [(0, '14.877')] +[2023-02-25 17:45:21,183][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000491_2011136.pth... +[2023-02-25 17:45:21,294][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000279_1142784.pth +[2023-02-25 17:45:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3623.9). Total num frames: 2027520. Throughput: 0: 880.5. Samples: 506006. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:45:26,159][14251] Avg episode reward: [(0, '15.275')] +[2023-02-25 17:45:30,874][14388] Updated weights for policy 0, policy_version 500 (0.0012) +[2023-02-25 17:45:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 2048000. Throughput: 0: 903.0. Samples: 511650. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 17:45:31,160][14251] Avg episode reward: [(0, '15.545')] +[2023-02-25 17:45:36,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 2068480. Throughput: 0: 936.3. Samples: 518184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:45:36,160][14251] Avg episode reward: [(0, '15.719')] +[2023-02-25 17:45:41,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 2084864. Throughput: 0: 913.6. Samples: 520324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 17:45:41,165][14251] Avg episode reward: [(0, '16.309')] +[2023-02-25 17:45:42,391][14388] Updated weights for policy 0, policy_version 510 (0.0021) +[2023-02-25 17:45:46,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3623.9). Total num frames: 2101248. Throughput: 0: 876.1. Samples: 524672. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-25 17:45:46,167][14251] Avg episode reward: [(0, '16.029')] +[2023-02-25 17:45:51,159][14251] Fps is (10 sec: 3276.2, 60 sec: 3618.0, 300 sec: 3610.0). Total num frames: 2117632. Throughput: 0: 902.2. Samples: 530138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:45:51,166][14251] Avg episode reward: [(0, '14.339')] +[2023-02-25 17:45:55,160][14388] Updated weights for policy 0, policy_version 520 (0.0036) +[2023-02-25 17:45:56,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3596.1). Total num frames: 2129920. Throughput: 0: 888.7. Samples: 532214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:45:56,165][14251] Avg episode reward: [(0, '13.619')] +[2023-02-25 17:46:01,157][14251] Fps is (10 sec: 2458.0, 60 sec: 3345.1, 300 sec: 3582.3). Total num frames: 2142208. Throughput: 0: 828.5. Samples: 535930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:01,162][14251] Avg episode reward: [(0, '14.520')] +[2023-02-25 17:46:06,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3568.4). Total num frames: 2158592. Throughput: 0: 822.0. Samples: 540838. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:06,164][14251] Avg episode reward: [(0, '13.881')] +[2023-02-25 17:46:08,418][14388] Updated weights for policy 0, policy_version 530 (0.0013) +[2023-02-25 17:46:11,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 2179072. Throughput: 0: 844.8. Samples: 544020. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:11,164][14251] Avg episode reward: [(0, '15.666')] +[2023-02-25 17:46:16,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3582.3). Total num frames: 2199552. Throughput: 0: 854.0. Samples: 550082. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:16,164][14251] Avg episode reward: [(0, '17.340')] +[2023-02-25 17:46:16,170][14374] Saving new best policy, reward=17.340! +[2023-02-25 17:46:19,761][14388] Updated weights for policy 0, policy_version 540 (0.0012) +[2023-02-25 17:46:21,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3345.1, 300 sec: 3568.4). Total num frames: 2211840. Throughput: 0: 799.5. Samples: 554160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:46:21,162][14251] Avg episode reward: [(0, '18.406')] +[2023-02-25 17:46:21,181][14374] Saving new best policy, reward=18.406! +[2023-02-25 17:46:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 2232320. Throughput: 0: 806.3. Samples: 556608. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:26,162][14251] Avg episode reward: [(0, '18.411')] +[2023-02-25 17:46:26,166][14374] Saving new best policy, reward=18.411! +[2023-02-25 17:46:30,713][14388] Updated weights for policy 0, policy_version 550 (0.0022) +[2023-02-25 17:46:31,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 2252800. Throughput: 0: 851.6. Samples: 562996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:46:31,159][14251] Avg episode reward: [(0, '18.206')] +[2023-02-25 17:46:36,161][14251] Fps is (10 sec: 3684.9, 60 sec: 3344.8, 300 sec: 3582.2). Total num frames: 2269184. Throughput: 0: 847.6. Samples: 568280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:46:36,168][14251] Avg episode reward: [(0, '18.073')] +[2023-02-25 17:46:41,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3568.4). Total num frames: 2285568. Throughput: 0: 849.1. Samples: 570424. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:46:41,164][14251] Avg episode reward: [(0, '17.453')] +[2023-02-25 17:46:42,931][14388] Updated weights for policy 0, policy_version 560 (0.0013) +[2023-02-25 17:46:46,157][14251] Fps is (10 sec: 3687.9, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 2306048. Throughput: 0: 890.0. Samples: 575978. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:46,162][14251] Avg episode reward: [(0, '17.064')] +[2023-02-25 17:46:51,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3481.7, 300 sec: 3582.3). Total num frames: 2326528. Throughput: 0: 926.0. Samples: 582508. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:46:51,161][14251] Avg episode reward: [(0, '17.851')] +[2023-02-25 17:46:53,369][14388] Updated weights for policy 0, policy_version 570 (0.0015) +[2023-02-25 17:46:56,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 2338816. Throughput: 0: 901.4. Samples: 584582. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:46:56,160][14251] Avg episode reward: [(0, '17.956')] +[2023-02-25 17:47:01,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2355200. Throughput: 0: 864.4. Samples: 588978. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:47:01,160][14251] Avg episode reward: [(0, '19.218')] +[2023-02-25 17:47:01,174][14374] Saving new best policy, reward=19.218! +[2023-02-25 17:47:05,216][14388] Updated weights for policy 0, policy_version 580 (0.0022) +[2023-02-25 17:47:06,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 2379776. Throughput: 0: 915.0. Samples: 595336. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:06,162][14251] Avg episode reward: [(0, '19.227')] +[2023-02-25 17:47:06,166][14374] Saving new best policy, reward=19.227! +[2023-02-25 17:47:11,162][14251] Fps is (10 sec: 4094.1, 60 sec: 3617.9, 300 sec: 3568.3). Total num frames: 2396160. Throughput: 0: 930.1. Samples: 598468. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:47:11,168][14251] Avg episode reward: [(0, '21.166')] +[2023-02-25 17:47:11,179][14374] Saving new best policy, reward=21.166! +[2023-02-25 17:47:16,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 2412544. Throughput: 0: 886.0. Samples: 602864. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:16,161][14251] Avg episode reward: [(0, '21.507')] +[2023-02-25 17:47:16,163][14374] Saving new best policy, reward=21.507! +[2023-02-25 17:47:17,541][14388] Updated weights for policy 0, policy_version 590 (0.0015) +[2023-02-25 17:47:21,157][14251] Fps is (10 sec: 3278.3, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2428928. Throughput: 0: 881.9. Samples: 607960. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:21,162][14251] Avg episode reward: [(0, '22.960')] +[2023-02-25 17:47:21,177][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000593_2428928.pth... +[2023-02-25 17:47:21,289][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000386_1581056.pth +[2023-02-25 17:47:21,301][14374] Saving new best policy, reward=22.960! +[2023-02-25 17:47:26,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2449408. Throughput: 0: 901.4. Samples: 610986. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:26,166][14251] Avg episode reward: [(0, '24.418')] +[2023-02-25 17:47:26,172][14374] Saving new best policy, reward=24.418! +[2023-02-25 17:47:27,808][14388] Updated weights for policy 0, policy_version 600 (0.0015) +[2023-02-25 17:47:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 2465792. Throughput: 0: 908.0. Samples: 616840. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:31,163][14251] Avg episode reward: [(0, '24.162')] +[2023-02-25 17:47:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3550.1, 300 sec: 3568.4). Total num frames: 2482176. Throughput: 0: 856.1. Samples: 621034. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:47:36,163][14251] Avg episode reward: [(0, '23.804')] +[2023-02-25 17:47:40,224][14388] Updated weights for policy 0, policy_version 610 (0.0017) +[2023-02-25 17:47:41,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 2498560. Throughput: 0: 873.8. Samples: 623904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:47:41,165][14251] Avg episode reward: [(0, '22.629')] +[2023-02-25 17:47:46,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 2523136. Throughput: 0: 916.5. Samples: 630222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:47:46,159][14251] Avg episode reward: [(0, '23.421')] +[2023-02-25 17:47:51,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 2535424. Throughput: 0: 886.5. Samples: 635228. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:47:51,163][14251] Avg episode reward: [(0, '22.718')] +[2023-02-25 17:47:51,392][14388] Updated weights for policy 0, policy_version 620 (0.0015) +[2023-02-25 17:47:56,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2551808. Throughput: 0: 862.5. Samples: 637276. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 17:47:56,163][14251] Avg episode reward: [(0, '21.713')] +[2023-02-25 17:48:01,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 2572288. Throughput: 0: 897.6. Samples: 643258. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:48:01,160][14251] Avg episode reward: [(0, '23.239')] +[2023-02-25 17:48:02,177][14388] Updated weights for policy 0, policy_version 630 (0.0023) +[2023-02-25 17:48:06,157][14251] Fps is (10 sec: 4095.8, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 2592768. Throughput: 0: 923.1. Samples: 649500. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:48:06,163][14251] Avg episode reward: [(0, '23.865')] +[2023-02-25 17:48:11,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3550.1, 300 sec: 3568.4). Total num frames: 2609152. Throughput: 0: 901.7. Samples: 651562. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:48:11,161][14251] Avg episode reward: [(0, '22.774')] +[2023-02-25 17:48:14,634][14388] Updated weights for policy 0, policy_version 640 (0.0022) +[2023-02-25 17:48:16,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2625536. Throughput: 0: 873.3. Samples: 656140. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:48:16,160][14251] Avg episode reward: [(0, '22.029')] +[2023-02-25 17:48:21,157][14251] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2646016. Throughput: 0: 923.6. Samples: 662598. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:48:21,160][14251] Avg episode reward: [(0, '23.788')] +[2023-02-25 17:48:24,530][14388] Updated weights for policy 0, policy_version 650 (0.0011) +[2023-02-25 17:48:26,157][14251] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 2666496. Throughput: 0: 928.4. Samples: 665682. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:48:26,160][14251] Avg episode reward: [(0, '24.293')] +[2023-02-25 17:48:31,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2678784. Throughput: 0: 882.6. Samples: 669938. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:48:31,160][14251] Avg episode reward: [(0, '24.282')] +[2023-02-25 17:48:36,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2699264. Throughput: 0: 896.0. Samples: 675546. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:48:36,164][14251] Avg episode reward: [(0, '23.928')] +[2023-02-25 17:48:36,631][14388] Updated weights for policy 0, policy_version 660 (0.0013) +[2023-02-25 17:48:41,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 2719744. Throughput: 0: 922.1. Samples: 678770. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:48:41,160][14251] Avg episode reward: [(0, '24.451')] +[2023-02-25 17:48:41,177][14374] Saving new best policy, reward=24.451! +[2023-02-25 17:48:46,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 2736128. Throughput: 0: 909.2. Samples: 684170. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:48:46,163][14251] Avg episode reward: [(0, '24.430')] +[2023-02-25 17:48:48,477][14388] Updated weights for policy 0, policy_version 670 (0.0015) +[2023-02-25 17:48:51,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 2748416. Throughput: 0: 865.9. Samples: 688466. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:48:51,160][14251] Avg episode reward: [(0, '23.306')] +[2023-02-25 17:48:56,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 2772992. Throughput: 0: 889.4. Samples: 691586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:48:56,160][14251] Avg episode reward: [(0, '22.555')] +[2023-02-25 17:48:58,819][14388] Updated weights for policy 0, policy_version 680 (0.0016) +[2023-02-25 17:49:01,157][14251] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 2793472. Throughput: 0: 929.1. Samples: 697950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:49:01,159][14251] Avg episode reward: [(0, '21.172')] +[2023-02-25 17:49:06,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2805760. Throughput: 0: 887.4. Samples: 702530. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:49:06,162][14251] Avg episode reward: [(0, '21.476')] +[2023-02-25 17:49:11,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 2822144. Throughput: 0: 870.8. Samples: 704868. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:49:11,163][14251] Avg episode reward: [(0, '21.337')] +[2023-02-25 17:49:11,232][14388] Updated weights for policy 0, policy_version 690 (0.0019) +[2023-02-25 17:49:16,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 2846720. Throughput: 0: 914.7. Samples: 711100. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:49:16,164][14251] Avg episode reward: [(0, '20.706')] +[2023-02-25 17:49:21,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2863104. Throughput: 0: 921.3. Samples: 717006. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:49:21,164][14251] Avg episode reward: [(0, '21.253')] +[2023-02-25 17:49:21,176][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000699_2863104.pth... +[2023-02-25 17:49:21,308][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000491_2011136.pth +[2023-02-25 17:49:21,467][14388] Updated weights for policy 0, policy_version 700 (0.0016) +[2023-02-25 17:49:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 2879488. Throughput: 0: 894.4. Samples: 719020. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:49:26,161][14251] Avg episode reward: [(0, '21.713')] +[2023-02-25 17:49:31,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2895872. Throughput: 0: 885.5. Samples: 724016. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:49:31,159][14251] Avg episode reward: [(0, '21.810')] +[2023-02-25 17:49:33,256][14388] Updated weights for policy 0, policy_version 710 (0.0015) +[2023-02-25 17:49:36,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 2916352. Throughput: 0: 934.9. Samples: 730538. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:49:36,165][14251] Avg episode reward: [(0, '21.947')] +[2023-02-25 17:49:41,160][14251] Fps is (10 sec: 4094.8, 60 sec: 3618.0, 300 sec: 3568.4). Total num frames: 2936832. Throughput: 0: 930.8. Samples: 733476. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:49:41,165][14251] Avg episode reward: [(0, '23.306')] +[2023-02-25 17:49:44,903][14388] Updated weights for policy 0, policy_version 720 (0.0017) +[2023-02-25 17:49:46,159][14251] Fps is (10 sec: 3276.1, 60 sec: 3549.7, 300 sec: 3554.5). Total num frames: 2949120. Throughput: 0: 884.3. Samples: 737744. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:49:46,162][14251] Avg episode reward: [(0, '23.848')] +[2023-02-25 17:49:51,157][14251] Fps is (10 sec: 3277.8, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 2969600. Throughput: 0: 909.2. Samples: 743444. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:49:51,163][14251] Avg episode reward: [(0, '24.182')] +[2023-02-25 17:49:55,277][14388] Updated weights for policy 0, policy_version 730 (0.0016) +[2023-02-25 17:49:56,157][14251] Fps is (10 sec: 4096.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2990080. Throughput: 0: 927.4. Samples: 746602. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:49:56,165][14251] Avg episode reward: [(0, '25.609')] +[2023-02-25 17:49:56,171][14374] Saving new best policy, reward=25.609! +[2023-02-25 17:50:01,158][14251] Fps is (10 sec: 3686.2, 60 sec: 3549.8, 300 sec: 3568.4). Total num frames: 3006464. Throughput: 0: 907.3. Samples: 751928. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:50:01,160][14251] Avg episode reward: [(0, '25.567')] +[2023-02-25 17:50:06,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 3022848. Throughput: 0: 873.2. Samples: 756300. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:50:06,159][14251] Avg episode reward: [(0, '26.618')] +[2023-02-25 17:50:06,166][14374] Saving new best policy, reward=26.618! +[2023-02-25 17:50:07,938][14388] Updated weights for policy 0, policy_version 740 (0.0017) +[2023-02-25 17:50:11,157][14251] Fps is (10 sec: 3686.6, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 3043328. Throughput: 0: 898.7. Samples: 759462. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:50:11,160][14251] Avg episode reward: [(0, '26.046')] +[2023-02-25 17:50:16,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 3063808. Throughput: 0: 929.6. Samples: 765846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:50:16,162][14251] Avg episode reward: [(0, '25.419')] +[2023-02-25 17:50:18,280][14388] Updated weights for policy 0, policy_version 750 (0.0015) +[2023-02-25 17:50:21,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 3076096. Throughput: 0: 884.0. Samples: 770318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:50:21,164][14251] Avg episode reward: [(0, '26.137')] +[2023-02-25 17:50:26,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 3096576. Throughput: 0: 868.1. Samples: 772536. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:50:26,162][14251] Avg episode reward: [(0, '25.944')] +[2023-02-25 17:50:29,686][14388] Updated weights for policy 0, policy_version 760 (0.0014) +[2023-02-25 17:50:31,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 3117056. Throughput: 0: 918.2. Samples: 779060. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:50:31,161][14251] Avg episode reward: [(0, '24.846')] +[2023-02-25 17:50:36,158][14251] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 3137536. Throughput: 0: 920.6. Samples: 784870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:50:36,161][14251] Avg episode reward: [(0, '22.818')] +[2023-02-25 17:50:41,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3554.5). Total num frames: 3149824. Throughput: 0: 899.5. Samples: 787078. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:50:41,160][14251] Avg episode reward: [(0, '22.553')] +[2023-02-25 17:50:41,385][14388] Updated weights for policy 0, policy_version 770 (0.0028) +[2023-02-25 17:50:46,157][14251] Fps is (10 sec: 3277.1, 60 sec: 3686.5, 300 sec: 3568.4). Total num frames: 3170304. Throughput: 0: 896.7. Samples: 792280. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:50:46,162][14251] Avg episode reward: [(0, '21.913')] +[2023-02-25 17:50:51,157][14251] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 3190784. Throughput: 0: 942.7. Samples: 798720. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:50:51,165][14251] Avg episode reward: [(0, '20.227')] +[2023-02-25 17:50:51,514][14388] Updated weights for policy 0, policy_version 780 (0.0016) +[2023-02-25 17:50:56,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 3207168. Throughput: 0: 931.4. Samples: 801376. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:50:56,165][14251] Avg episode reward: [(0, '20.261')] +[2023-02-25 17:51:01,158][14251] Fps is (10 sec: 3276.6, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 3223552. Throughput: 0: 882.8. Samples: 805574. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:51:01,160][14251] Avg episode reward: [(0, '21.418')] +[2023-02-25 17:51:03,944][14388] Updated weights for policy 0, policy_version 790 (0.0030) +[2023-02-25 17:51:06,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3244032. Throughput: 0: 919.4. Samples: 811692. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:51:06,164][14251] Avg episode reward: [(0, '21.115')] +[2023-02-25 17:51:11,157][14251] Fps is (10 sec: 4096.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3264512. Throughput: 0: 942.4. Samples: 814946. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:51:11,160][14251] Avg episode reward: [(0, '21.697')] +[2023-02-25 17:51:14,580][14388] Updated weights for policy 0, policy_version 800 (0.0018) +[2023-02-25 17:51:16,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 3280896. Throughput: 0: 909.7. Samples: 819998. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:51:16,159][14251] Avg episode reward: [(0, '21.380')] +[2023-02-25 17:51:21,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3297280. Throughput: 0: 888.3. Samples: 824844. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:51:21,166][14251] Avg episode reward: [(0, '20.763')] +[2023-02-25 17:51:21,181][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000805_3297280.pth... +[2023-02-25 17:51:21,316][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000593_2428928.pth +[2023-02-25 17:51:25,794][14388] Updated weights for policy 0, policy_version 810 (0.0023) +[2023-02-25 17:51:26,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3317760. Throughput: 0: 911.3. Samples: 828086. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:51:26,159][14251] Avg episode reward: [(0, '20.950')] +[2023-02-25 17:51:31,161][14251] Fps is (10 sec: 4094.5, 60 sec: 3686.2, 300 sec: 3623.9). Total num frames: 3338240. Throughput: 0: 934.6. Samples: 834342. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:51:31,164][14251] Avg episode reward: [(0, '20.732')] +[2023-02-25 17:51:36,160][14251] Fps is (10 sec: 3275.8, 60 sec: 3549.7, 300 sec: 3610.0). Total num frames: 3350528. Throughput: 0: 873.8. Samples: 838042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:51:36,169][14251] Avg episode reward: [(0, '20.479')] +[2023-02-25 17:51:39,802][14388] Updated weights for policy 0, policy_version 820 (0.0014) +[2023-02-25 17:51:41,159][14251] Fps is (10 sec: 2048.3, 60 sec: 3481.5, 300 sec: 3568.4). Total num frames: 3358720. Throughput: 0: 854.0. Samples: 839808. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0) +[2023-02-25 17:51:41,166][14251] Avg episode reward: [(0, '19.972')] +[2023-02-25 17:51:46,157][14251] Fps is (10 sec: 2868.0, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 3379200. Throughput: 0: 864.8. Samples: 844488. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:51:46,166][14251] Avg episode reward: [(0, '21.026')] +[2023-02-25 17:51:50,716][14388] Updated weights for policy 0, policy_version 830 (0.0018) +[2023-02-25 17:51:51,159][14251] Fps is (10 sec: 4096.0, 60 sec: 3481.5, 300 sec: 3596.1). Total num frames: 3399680. Throughput: 0: 866.5. Samples: 850684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:51:51,164][14251] Avg episode reward: [(0, '21.249')] +[2023-02-25 17:51:56,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3582.3). Total num frames: 3411968. Throughput: 0: 839.9. Samples: 852742. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:51:56,163][14251] Avg episode reward: [(0, '22.230')] +[2023-02-25 17:52:01,157][14251] Fps is (10 sec: 3277.4, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 3432448. Throughput: 0: 834.0. Samples: 857526. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:52:01,163][14251] Avg episode reward: [(0, '24.627')] +[2023-02-25 17:52:02,849][14388] Updated weights for policy 0, policy_version 840 (0.0017) +[2023-02-25 17:52:06,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 3452928. Throughput: 0: 871.9. Samples: 864080. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:52:06,159][14251] Avg episode reward: [(0, '25.089')] +[2023-02-25 17:52:11,163][14251] Fps is (10 sec: 4093.8, 60 sec: 3481.3, 300 sec: 3596.1). Total num frames: 3473408. Throughput: 0: 870.4. Samples: 867258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:52:11,166][14251] Avg episode reward: [(0, '24.474')] +[2023-02-25 17:52:13,957][14388] Updated weights for policy 0, policy_version 850 (0.0015) +[2023-02-25 17:52:16,157][14251] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3582.3). Total num frames: 3485696. Throughput: 0: 827.5. Samples: 871576. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:52:16,165][14251] Avg episode reward: [(0, '24.907')] +[2023-02-25 17:52:21,157][14251] Fps is (10 sec: 3278.6, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 3506176. Throughput: 0: 872.0. Samples: 877280. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:52:21,160][14251] Avg episode reward: [(0, '25.065')] +[2023-02-25 17:52:24,470][14388] Updated weights for policy 0, policy_version 860 (0.0014) +[2023-02-25 17:52:26,157][14251] Fps is (10 sec: 4096.1, 60 sec: 3481.6, 300 sec: 3596.1). Total num frames: 3526656. Throughput: 0: 906.2. Samples: 880584. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:52:26,160][14251] Avg episode reward: [(0, '22.430')] +[2023-02-25 17:52:31,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3413.5, 300 sec: 3596.1). Total num frames: 3543040. Throughput: 0: 922.3. Samples: 885992. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:52:31,166][14251] Avg episode reward: [(0, '22.508')] +[2023-02-25 17:52:36,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3481.8, 300 sec: 3596.1). Total num frames: 3559424. Throughput: 0: 887.0. Samples: 890596. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:52:36,160][14251] Avg episode reward: [(0, '22.554')] +[2023-02-25 17:52:36,624][14388] Updated weights for policy 0, policy_version 870 (0.0023) +[2023-02-25 17:52:41,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3582.3). Total num frames: 3579904. Throughput: 0: 913.9. Samples: 893868. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:52:41,163][14251] Avg episode reward: [(0, '23.469')] +[2023-02-25 17:52:46,157][14251] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3600384. Throughput: 0: 949.9. Samples: 900272. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:52:46,162][14251] Avg episode reward: [(0, '22.883')] +[2023-02-25 17:52:46,854][14388] Updated weights for policy 0, policy_version 880 (0.0012) +[2023-02-25 17:52:51,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3610.0). Total num frames: 3616768. Throughput: 0: 899.0. Samples: 904536. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:52:51,164][14251] Avg episode reward: [(0, '23.736')] +[2023-02-25 17:52:56,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 3633152. Throughput: 0: 880.6. Samples: 906880. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:52:56,165][14251] Avg episode reward: [(0, '24.588')] +[2023-02-25 17:52:58,838][14388] Updated weights for policy 0, policy_version 890 (0.0025) +[2023-02-25 17:53:01,157][14251] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 3653632. Throughput: 0: 925.4. Samples: 913218. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:01,165][14251] Avg episode reward: [(0, '24.548')] +[2023-02-25 17:53:06,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3674112. Throughput: 0: 924.8. Samples: 918898. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:53:06,160][14251] Avg episode reward: [(0, '23.676')] +[2023-02-25 17:53:10,381][14388] Updated weights for policy 0, policy_version 900 (0.0016) +[2023-02-25 17:53:11,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3550.2, 300 sec: 3596.1). Total num frames: 3686400. Throughput: 0: 900.3. Samples: 921098. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:11,161][14251] Avg episode reward: [(0, '23.601')] +[2023-02-25 17:53:16,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 3706880. Throughput: 0: 897.3. Samples: 926372. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:53:16,159][14251] Avg episode reward: [(0, '22.888')] +[2023-02-25 17:53:20,432][14388] Updated weights for policy 0, policy_version 910 (0.0023) +[2023-02-25 17:53:21,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 3727360. Throughput: 0: 941.3. Samples: 932954. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:21,160][14251] Avg episode reward: [(0, '23.604')] +[2023-02-25 17:53:21,169][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000910_3727360.pth... +[2023-02-25 17:53:21,275][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000699_2863104.pth +[2023-02-25 17:53:26,159][14251] Fps is (10 sec: 3685.6, 60 sec: 3618.0, 300 sec: 3610.0). Total num frames: 3743744. Throughput: 0: 925.6. Samples: 935522. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:26,171][14251] Avg episode reward: [(0, '24.355')] +[2023-02-25 17:53:31,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 3760128. Throughput: 0: 880.3. Samples: 939884. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-25 17:53:31,166][14251] Avg episode reward: [(0, '24.158')] +[2023-02-25 17:53:32,738][14388] Updated weights for policy 0, policy_version 920 (0.0017) +[2023-02-25 17:53:36,157][14251] Fps is (10 sec: 3687.2, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 3780608. Throughput: 0: 918.8. Samples: 945880. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:36,164][14251] Avg episode reward: [(0, '24.198')] +[2023-02-25 17:53:41,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3801088. Throughput: 0: 937.0. Samples: 949044. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:53:41,160][14251] Avg episode reward: [(0, '24.438')] +[2023-02-25 17:53:42,828][14388] Updated weights for policy 0, policy_version 930 (0.0014) +[2023-02-25 17:53:46,160][14251] Fps is (10 sec: 3685.3, 60 sec: 3618.0, 300 sec: 3623.9). Total num frames: 3817472. Throughput: 0: 909.5. Samples: 954150. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:53:46,163][14251] Avg episode reward: [(0, '24.113')] +[2023-02-25 17:53:51,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 3833856. Throughput: 0: 889.5. Samples: 958926. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:51,166][14251] Avg episode reward: [(0, '23.471')] +[2023-02-25 17:53:54,785][14388] Updated weights for policy 0, policy_version 940 (0.0014) +[2023-02-25 17:53:56,157][14251] Fps is (10 sec: 3687.5, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 3854336. Throughput: 0: 910.7. Samples: 962080. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:53:56,163][14251] Avg episode reward: [(0, '22.773')] +[2023-02-25 17:54:01,157][14251] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 3870720. Throughput: 0: 931.7. Samples: 968298. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:54:01,163][14251] Avg episode reward: [(0, '22.744')] +[2023-02-25 17:54:06,157][14251] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3610.0). Total num frames: 3887104. Throughput: 0: 880.9. Samples: 972594. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:54:06,159][14251] Avg episode reward: [(0, '22.732')] +[2023-02-25 17:54:07,003][14388] Updated weights for policy 0, policy_version 950 (0.0022) +[2023-02-25 17:54:11,157][14251] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 3907584. Throughput: 0: 876.6. Samples: 974966. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:54:11,165][14251] Avg episode reward: [(0, '23.535')] +[2023-02-25 17:54:16,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3928064. Throughput: 0: 923.6. Samples: 981444. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-25 17:54:16,160][14251] Avg episode reward: [(0, '23.066')] +[2023-02-25 17:54:16,994][14388] Updated weights for policy 0, policy_version 960 (0.0012) +[2023-02-25 17:54:21,157][14251] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 3944448. Throughput: 0: 912.3. Samples: 986932. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:54:21,164][14251] Avg episode reward: [(0, '24.091')] +[2023-02-25 17:54:26,157][14251] Fps is (10 sec: 2867.2, 60 sec: 3550.0, 300 sec: 3596.1). Total num frames: 3956736. Throughput: 0: 886.5. Samples: 988936. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-25 17:54:26,162][14251] Avg episode reward: [(0, '24.839')] +[2023-02-25 17:54:29,635][14388] Updated weights for policy 0, policy_version 970 (0.0027) +[2023-02-25 17:54:31,157][14251] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 3977216. Throughput: 0: 891.7. Samples: 994274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-25 17:54:31,165][14251] Avg episode reward: [(0, '24.048')] +[2023-02-25 17:54:36,157][14251] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 3997696. Throughput: 0: 929.6. Samples: 1000760. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-25 17:54:36,166][14251] Avg episode reward: [(0, '25.056')] +[2023-02-25 17:54:37,075][14374] Stopping Batcher_0... +[2023-02-25 17:54:37,077][14374] Loop batcher_evt_loop terminating... +[2023-02-25 17:54:37,078][14251] Component Batcher_0 stopped! +[2023-02-25 17:54:37,084][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 17:54:37,085][14251] Component RolloutWorker_w3 process died already! Don't wait for it. +[2023-02-25 17:54:37,090][14251] Component RolloutWorker_w5 process died already! Don't wait for it. +[2023-02-25 17:54:37,144][14388] Weights refcount: 2 0 +[2023-02-25 17:54:37,142][14399] Stopping RolloutWorker_w7... +[2023-02-25 17:54:37,144][14399] Loop rollout_proc7_evt_loop terminating... +[2023-02-25 17:54:37,146][14388] Stopping InferenceWorker_p0-w0... +[2023-02-25 17:54:37,143][14251] Component RolloutWorker_w7 stopped! +[2023-02-25 17:54:37,155][14388] Loop inference_proc0-0_evt_loop terminating... +[2023-02-25 17:54:37,156][14251] Component InferenceWorker_p0-w0 stopped! +[2023-02-25 17:54:37,169][14390] Stopping RolloutWorker_w1... +[2023-02-25 17:54:37,169][14251] Component RolloutWorker_w1 stopped! +[2023-02-25 17:54:37,171][14390] Loop rollout_proc1_evt_loop terminating... +[2023-02-25 17:54:37,191][14251] Component RolloutWorker_w6 stopped! +[2023-02-25 17:54:37,198][14400] Stopping RolloutWorker_w6... +[2023-02-25 17:54:37,198][14400] Loop rollout_proc6_evt_loop terminating... +[2023-02-25 17:54:37,200][14251] Component RolloutWorker_w0 stopped! +[2023-02-25 17:54:37,206][14389] Stopping RolloutWorker_w0... +[2023-02-25 17:54:37,207][14389] Loop rollout_proc0_evt_loop terminating... +[2023-02-25 17:54:37,215][14251] Component RolloutWorker_w4 stopped! +[2023-02-25 17:54:37,221][14396] Stopping RolloutWorker_w4... +[2023-02-25 17:54:37,222][14396] Loop rollout_proc4_evt_loop terminating... +[2023-02-25 17:54:37,236][14251] Component RolloutWorker_w2 stopped! +[2023-02-25 17:54:37,242][14395] Stopping RolloutWorker_w2... +[2023-02-25 17:54:37,243][14395] Loop rollout_proc2_evt_loop terminating... +[2023-02-25 17:54:37,307][14374] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000805_3297280.pth +[2023-02-25 17:54:37,321][14374] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 17:54:37,552][14251] Component LearnerWorker_p0 stopped! +[2023-02-25 17:54:37,557][14251] Waiting for process learner_proc0 to stop... +[2023-02-25 17:54:37,565][14374] Stopping LearnerWorker_p0... +[2023-02-25 17:54:37,566][14374] Loop learner_proc0_evt_loop terminating... +[2023-02-25 17:54:39,732][14251] Waiting for process inference_proc0-0 to join... +[2023-02-25 17:54:40,509][14251] Waiting for process rollout_proc0 to join... +[2023-02-25 17:54:40,593][14251] Waiting for process rollout_proc1 to join... +[2023-02-25 17:54:40,888][14251] Waiting for process rollout_proc2 to join... +[2023-02-25 17:54:40,893][14251] Waiting for process rollout_proc3 to join... +[2023-02-25 17:54:40,895][14251] Waiting for process rollout_proc4 to join... +[2023-02-25 17:54:40,898][14251] Waiting for process rollout_proc5 to join... +[2023-02-25 17:54:40,899][14251] Waiting for process rollout_proc6 to join... +[2023-02-25 17:54:40,900][14251] Waiting for process rollout_proc7 to join... +[2023-02-25 17:54:40,904][14251] Batcher 0 profile tree view: +batching: 24.0045, releasing_batches: 0.0230 +[2023-02-25 17:54:40,906][14251] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0058 + wait_policy_total: 513.2130 +update_model: 9.0858 + weight_update: 0.0013 +one_step: 0.0032 + handle_policy_step: 561.8960 + deserialize: 16.0145, stack: 3.5402, obs_to_device_normalize: 128.4644, forward: 274.9016, send_messages: 24.1774 + prepare_outputs: 86.8608 + to_cpu: 54.2174 +[2023-02-25 17:54:40,907][14251] Learner 0 profile tree view: +misc: 0.0061, prepare_batch: 15.3394 +train: 73.0352 + epoch_init: 0.0056, minibatch_init: 0.0064, losses_postprocess: 0.5665, kl_divergence: 0.5958, after_optimizer: 32.3123 + calculate_losses: 25.5657 + losses_init: 0.0035, forward_head: 1.7781, bptt_initial: 17.2020, tail: 0.9289, advantages_returns: 0.2295, losses: 3.0318 + bptt: 2.1084 + bptt_forward_core: 2.0380 + update: 13.4033 + clip: 1.3951 +[2023-02-25 17:54:40,910][14251] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3565, enqueue_policy_requests: 121.8768, env_step: 880.2285, overhead: 23.6758, complete_rollouts: 8.7697 +save_policy_outputs: 23.4027 + split_output_tensors: 11.5754 +[2023-02-25 17:54:40,911][14251] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4465, enqueue_policy_requests: 257.9998, env_step: 735.2262, overhead: 27.3052, complete_rollouts: 5.1001 +save_policy_outputs: 24.4110 + split_output_tensors: 11.9177 +[2023-02-25 17:54:40,913][14251] Loop Runner_EvtLoop terminating... +[2023-02-25 17:54:40,916][14251] Runner profile tree view: +main_loop: 1156.3895 +[2023-02-25 17:54:40,918][14251] Collected {0: 4005888}, FPS: 3464.1 +[2023-02-25 17:54:41,068][14251] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 17:54:41,072][14251] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 17:54:41,074][14251] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 17:54:41,079][14251] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 17:54:41,081][14251] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 17:54:41,086][14251] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 17:54:41,087][14251] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 17:54:41,088][14251] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 17:54:41,090][14251] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-25 17:54:41,092][14251] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-25 17:54:41,094][14251] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 17:54:41,096][14251] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 17:54:41,098][14251] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 17:54:41,099][14251] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 17:54:41,103][14251] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 17:54:41,134][14251] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 17:54:41,139][14251] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 17:54:41,143][14251] RunningMeanStd input shape: (1,) +[2023-02-25 17:54:41,166][14251] ConvEncoder: input_channels=3 +[2023-02-25 17:54:41,941][14251] Conv encoder output size: 512 +[2023-02-25 17:54:41,945][14251] Policy head output size: 512 +[2023-02-25 17:54:44,423][14251] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 17:54:45,681][14251] Num frames 100... +[2023-02-25 17:54:45,795][14251] Num frames 200... +[2023-02-25 17:54:45,908][14251] Num frames 300... +[2023-02-25 17:54:46,025][14251] Num frames 400... +[2023-02-25 17:54:46,145][14251] Num frames 500... +[2023-02-25 17:54:46,263][14251] Num frames 600... +[2023-02-25 17:54:46,385][14251] Num frames 700... +[2023-02-25 17:54:46,525][14251] Avg episode rewards: #0: 14.680, true rewards: #0: 7.680 +[2023-02-25 17:54:46,527][14251] Avg episode reward: 14.680, avg true_objective: 7.680 +[2023-02-25 17:54:46,565][14251] Num frames 800... +[2023-02-25 17:54:46,680][14251] Num frames 900... +[2023-02-25 17:54:46,796][14251] Num frames 1000... +[2023-02-25 17:54:46,913][14251] Num frames 1100... +[2023-02-25 17:54:47,026][14251] Num frames 1200... +[2023-02-25 17:54:47,142][14251] Num frames 1300... +[2023-02-25 17:54:47,259][14251] Num frames 1400... +[2023-02-25 17:54:47,378][14251] Num frames 1500... +[2023-02-25 17:54:47,496][14251] Num frames 1600... +[2023-02-25 17:54:47,613][14251] Num frames 1700... +[2023-02-25 17:54:47,726][14251] Num frames 1800... +[2023-02-25 17:54:47,842][14251] Num frames 1900... +[2023-02-25 17:54:47,955][14251] Avg episode rewards: #0: 21.260, true rewards: #0: 9.760 +[2023-02-25 17:54:47,957][14251] Avg episode reward: 21.260, avg true_objective: 9.760 +[2023-02-25 17:54:48,010][14251] Num frames 2000... +[2023-02-25 17:54:48,126][14251] Num frames 2100... +[2023-02-25 17:54:48,244][14251] Num frames 2200... +[2023-02-25 17:54:48,361][14251] Num frames 2300... +[2023-02-25 17:54:48,478][14251] Num frames 2400... +[2023-02-25 17:54:48,594][14251] Num frames 2500... +[2023-02-25 17:54:48,712][14251] Num frames 2600... +[2023-02-25 17:54:48,867][14251] Avg episode rewards: #0: 18.627, true rewards: #0: 8.960 +[2023-02-25 17:54:48,869][14251] Avg episode reward: 18.627, avg true_objective: 8.960 +[2023-02-25 17:54:48,886][14251] Num frames 2700... +[2023-02-25 17:54:49,000][14251] Num frames 2800... +[2023-02-25 17:54:49,116][14251] Num frames 2900... +[2023-02-25 17:54:49,232][14251] Num frames 3000... +[2023-02-25 17:54:49,352][14251] Num frames 3100... +[2023-02-25 17:54:49,466][14251] Num frames 3200... +[2023-02-25 17:54:49,583][14251] Num frames 3300... +[2023-02-25 17:54:49,711][14251] Num frames 3400... +[2023-02-25 17:54:49,846][14251] Num frames 3500... +[2023-02-25 17:54:49,955][14251] Num frames 3600... +[2023-02-25 17:54:50,072][14251] Avg episode rewards: #0: 18.620, true rewards: #0: 9.120 +[2023-02-25 17:54:50,073][14251] Avg episode reward: 18.620, avg true_objective: 9.120 +[2023-02-25 17:54:50,136][14251] Num frames 3700... +[2023-02-25 17:54:50,255][14251] Num frames 3800... +[2023-02-25 17:54:50,370][14251] Num frames 3900... +[2023-02-25 17:54:50,488][14251] Num frames 4000... +[2023-02-25 17:54:50,624][14251] Num frames 4100... +[2023-02-25 17:54:50,748][14251] Num frames 4200... +[2023-02-25 17:54:50,834][14251] Avg episode rewards: #0: 16.648, true rewards: #0: 8.448 +[2023-02-25 17:54:50,836][14251] Avg episode reward: 16.648, avg true_objective: 8.448 +[2023-02-25 17:54:50,929][14251] Num frames 4300... +[2023-02-25 17:54:51,042][14251] Num frames 4400... +[2023-02-25 17:54:51,162][14251] Num frames 4500... +[2023-02-25 17:54:51,283][14251] Num frames 4600... +[2023-02-25 17:54:51,400][14251] Num frames 4700... +[2023-02-25 17:54:51,518][14251] Num frames 4800... +[2023-02-25 17:54:51,638][14251] Num frames 4900... +[2023-02-25 17:54:51,757][14251] Num frames 5000... +[2023-02-25 17:54:51,867][14251] Num frames 5100... +[2023-02-25 17:54:51,985][14251] Num frames 5200... +[2023-02-25 17:54:52,106][14251] Num frames 5300... +[2023-02-25 17:54:52,226][14251] Num frames 5400... +[2023-02-25 17:54:52,345][14251] Num frames 5500... +[2023-02-25 17:54:52,458][14251] Num frames 5600... +[2023-02-25 17:54:52,662][14251] Avg episode rewards: #0: 20.147, true rewards: #0: 9.480 +[2023-02-25 17:54:52,664][14251] Avg episode reward: 20.147, avg true_objective: 9.480 +[2023-02-25 17:54:52,689][14251] Num frames 5700... +[2023-02-25 17:54:52,842][14251] Num frames 5800... +[2023-02-25 17:54:52,998][14251] Num frames 5900... +[2023-02-25 17:54:53,167][14251] Num frames 6000... +[2023-02-25 17:54:53,326][14251] Num frames 6100... +[2023-02-25 17:54:53,487][14251] Num frames 6200... +[2023-02-25 17:54:53,647][14251] Num frames 6300... +[2023-02-25 17:54:53,811][14251] Num frames 6400... +[2023-02-25 17:54:53,966][14251] Num frames 6500... +[2023-02-25 17:54:54,150][14251] Avg episode rewards: #0: 19.834, true rewards: #0: 9.406 +[2023-02-25 17:54:54,152][14251] Avg episode reward: 19.834, avg true_objective: 9.406 +[2023-02-25 17:54:54,182][14251] Num frames 6600... +[2023-02-25 17:54:54,357][14251] Num frames 6700... +[2023-02-25 17:54:54,521][14251] Num frames 6800... +[2023-02-25 17:54:54,682][14251] Num frames 6900... +[2023-02-25 17:54:54,840][14251] Num frames 7000... +[2023-02-25 17:54:55,000][14251] Num frames 7100... +[2023-02-25 17:54:55,169][14251] Num frames 7200... +[2023-02-25 17:54:55,328][14251] Num frames 7300... +[2023-02-25 17:54:55,509][14251] Num frames 7400... +[2023-02-25 17:54:55,683][14251] Num frames 7500... +[2023-02-25 17:54:55,865][14251] Avg episode rewards: #0: 20.346, true rewards: #0: 9.471 +[2023-02-25 17:54:55,867][14251] Avg episode reward: 20.346, avg true_objective: 9.471 +[2023-02-25 17:54:55,903][14251] Num frames 7600... +[2023-02-25 17:54:56,063][14251] Num frames 7700... +[2023-02-25 17:54:56,186][14251] Num frames 7800... +[2023-02-25 17:54:56,305][14251] Num frames 7900... +[2023-02-25 17:54:56,431][14251] Num frames 8000... +[2023-02-25 17:54:56,551][14251] Num frames 8100... +[2023-02-25 17:54:56,678][14251] Num frames 8200... +[2023-02-25 17:54:56,796][14251] Num frames 8300... +[2023-02-25 17:54:56,920][14251] Num frames 8400... +[2023-02-25 17:54:57,038][14251] Num frames 8500... +[2023-02-25 17:54:57,148][14251] Num frames 8600... +[2023-02-25 17:54:57,280][14251] Avg episode rewards: #0: 21.072, true rewards: #0: 9.628 +[2023-02-25 17:54:57,281][14251] Avg episode reward: 21.072, avg true_objective: 9.628 +[2023-02-25 17:54:57,329][14251] Num frames 8700... +[2023-02-25 17:54:57,451][14251] Num frames 8800... +[2023-02-25 17:54:57,566][14251] Num frames 8900... +[2023-02-25 17:54:57,678][14251] Num frames 9000... +[2023-02-25 17:54:57,803][14251] Num frames 9100... +[2023-02-25 17:54:57,925][14251] Num frames 9200... +[2023-02-25 17:54:58,033][14251] Num frames 9300... +[2023-02-25 17:54:58,150][14251] Num frames 9400... +[2023-02-25 17:54:58,266][14251] Num frames 9500... +[2023-02-25 17:54:58,383][14251] Num frames 9600... +[2023-02-25 17:54:58,505][14251] Num frames 9700... +[2023-02-25 17:54:58,618][14251] Num frames 9800... +[2023-02-25 17:54:58,736][14251] Num frames 9900... +[2023-02-25 17:54:58,857][14251] Num frames 10000... +[2023-02-25 17:54:58,974][14251] Num frames 10100... +[2023-02-25 17:54:59,090][14251] Num frames 10200... +[2023-02-25 17:54:59,201][14251] Num frames 10300... +[2023-02-25 17:54:59,321][14251] Num frames 10400... +[2023-02-25 17:54:59,440][14251] Num frames 10500... +[2023-02-25 17:54:59,563][14251] Num frames 10600... +[2023-02-25 17:54:59,676][14251] Num frames 10700... +[2023-02-25 17:54:59,804][14251] Avg episode rewards: #0: 24.065, true rewards: #0: 10.765 +[2023-02-25 17:54:59,805][14251] Avg episode reward: 24.065, avg true_objective: 10.765 +[2023-02-25 17:56:00,948][14251] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-25 17:59:00,572][14251] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 17:59:00,574][14251] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 17:59:00,576][14251] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 17:59:00,578][14251] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 17:59:00,580][14251] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 17:59:00,582][14251] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 17:59:00,588][14251] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-25 17:59:00,589][14251] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 17:59:00,590][14251] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-25 17:59:00,591][14251] Adding new argument 'hf_repository'='Hatman/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-25 17:59:00,592][14251] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 17:59:00,593][14251] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 17:59:00,594][14251] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 17:59:00,595][14251] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 17:59:00,600][14251] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 17:59:00,628][14251] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 17:59:00,631][14251] RunningMeanStd input shape: (1,) +[2023-02-25 17:59:00,644][14251] ConvEncoder: input_channels=3 +[2023-02-25 17:59:00,682][14251] Conv encoder output size: 512 +[2023-02-25 17:59:00,684][14251] Policy head output size: 512 +[2023-02-25 17:59:00,702][14251] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 17:59:01,149][14251] Num frames 100... +[2023-02-25 17:59:01,263][14251] Num frames 200... +[2023-02-25 17:59:01,394][14251] Num frames 300... +[2023-02-25 17:59:01,522][14251] Num frames 400... +[2023-02-25 17:59:01,636][14251] Num frames 500... +[2023-02-25 17:59:01,761][14251] Num frames 600... +[2023-02-25 17:59:01,877][14251] Num frames 700... +[2023-02-25 17:59:02,001][14251] Num frames 800... +[2023-02-25 17:59:02,136][14251] Num frames 900... +[2023-02-25 17:59:02,263][14251] Num frames 1000... +[2023-02-25 17:59:02,372][14251] Num frames 1100... +[2023-02-25 17:59:02,495][14251] Avg episode rewards: #0: 28.520, true rewards: #0: 11.520 +[2023-02-25 17:59:02,497][14251] Avg episode reward: 28.520, avg true_objective: 11.520 +[2023-02-25 17:59:02,558][14251] Num frames 1200... +[2023-02-25 17:59:02,680][14251] Num frames 1300... +[2023-02-25 17:59:02,800][14251] Num frames 1400... +[2023-02-25 17:59:02,917][14251] Num frames 1500... +[2023-02-25 17:59:03,030][14251] Num frames 1600... +[2023-02-25 17:59:03,145][14251] Num frames 1700... +[2023-02-25 17:59:03,259][14251] Num frames 1800... +[2023-02-25 17:59:03,377][14251] Num frames 1900... +[2023-02-25 17:59:03,503][14251] Num frames 2000... +[2023-02-25 17:59:03,613][14251] Num frames 2100... +[2023-02-25 17:59:03,761][14251] Avg episode rewards: #0: 24.380, true rewards: #0: 10.880 +[2023-02-25 17:59:03,764][14251] Avg episode reward: 24.380, avg true_objective: 10.880 +[2023-02-25 17:59:03,801][14251] Num frames 2200... +[2023-02-25 17:59:03,933][14251] Num frames 2300... +[2023-02-25 17:59:04,050][14251] Num frames 2400... +[2023-02-25 17:59:04,166][14251] Num frames 2500... +[2023-02-25 17:59:04,290][14251] Num frames 2600... +[2023-02-25 17:59:04,410][14251] Num frames 2700... +[2023-02-25 17:59:04,528][14251] Num frames 2800... +[2023-02-25 17:59:04,642][14251] Num frames 2900... +[2023-02-25 17:59:04,791][14251] Avg episode rewards: #0: 22.587, true rewards: #0: 9.920 +[2023-02-25 17:59:04,793][14251] Avg episode reward: 22.587, avg true_objective: 9.920 +[2023-02-25 17:59:04,838][14251] Num frames 3000... +[2023-02-25 17:59:04,959][14251] Num frames 3100... +[2023-02-25 17:59:05,085][14251] Num frames 3200... +[2023-02-25 17:59:05,205][14251] Num frames 3300... +[2023-02-25 17:59:05,319][14251] Num frames 3400... +[2023-02-25 17:59:05,438][14251] Num frames 3500... +[2023-02-25 17:59:05,564][14251] Num frames 3600... +[2023-02-25 17:59:05,682][14251] Num frames 3700... +[2023-02-25 17:59:05,801][14251] Num frames 3800... +[2023-02-25 17:59:05,919][14251] Num frames 3900... +[2023-02-25 17:59:06,037][14251] Num frames 4000... +[2023-02-25 17:59:06,157][14251] Num frames 4100... +[2023-02-25 17:59:06,280][14251] Num frames 4200... +[2023-02-25 17:59:06,398][14251] Num frames 4300... +[2023-02-25 17:59:06,525][14251] Num frames 4400... +[2023-02-25 17:59:06,643][14251] Num frames 4500... +[2023-02-25 17:59:06,756][14251] Num frames 4600... +[2023-02-25 17:59:06,879][14251] Num frames 4700... +[2023-02-25 17:59:06,994][14251] Num frames 4800... +[2023-02-25 17:59:07,110][14251] Num frames 4900... +[2023-02-25 17:59:07,232][14251] Num frames 5000... +[2023-02-25 17:59:07,379][14251] Avg episode rewards: #0: 31.190, true rewards: #0: 12.690 +[2023-02-25 17:59:07,381][14251] Avg episode reward: 31.190, avg true_objective: 12.690 +[2023-02-25 17:59:07,418][14251] Num frames 5100... +[2023-02-25 17:59:07,534][14251] Num frames 5200... +[2023-02-25 17:59:07,657][14251] Num frames 5300... +[2023-02-25 17:59:07,765][14251] Num frames 5400... +[2023-02-25 17:59:07,887][14251] Num frames 5500... +[2023-02-25 17:59:08,003][14251] Num frames 5600... +[2023-02-25 17:59:08,119][14251] Num frames 5700... +[2023-02-25 17:59:08,239][14251] Num frames 5800... +[2023-02-25 17:59:08,346][14251] Avg episode rewards: #0: 28.088, true rewards: #0: 11.688 +[2023-02-25 17:59:08,348][14251] Avg episode reward: 28.088, avg true_objective: 11.688 +[2023-02-25 17:59:08,422][14251] Num frames 5900... +[2023-02-25 17:59:08,538][14251] Num frames 6000... +[2023-02-25 17:59:08,661][14251] Num frames 6100... +[2023-02-25 17:59:08,781][14251] Num frames 6200... +[2023-02-25 17:59:08,899][14251] Num frames 6300... +[2023-02-25 17:59:09,066][14251] Num frames 6400... +[2023-02-25 17:59:09,222][14251] Num frames 6500... +[2023-02-25 17:59:09,382][14251] Num frames 6600... +[2023-02-25 17:59:09,554][14251] Num frames 6700... +[2023-02-25 17:59:09,715][14251] Num frames 6800... +[2023-02-25 17:59:09,877][14251] Num frames 6900... +[2023-02-25 17:59:10,041][14251] Num frames 7000... +[2023-02-25 17:59:10,207][14251] Num frames 7100... +[2023-02-25 17:59:10,372][14251] Num frames 7200... +[2023-02-25 17:59:10,535][14251] Num frames 7300... +[2023-02-25 17:59:10,711][14251] Num frames 7400... +[2023-02-25 17:59:10,871][14251] Num frames 7500... +[2023-02-25 17:59:11,027][14251] Num frames 7600... +[2023-02-25 17:59:11,196][14251] Num frames 7700... +[2023-02-25 17:59:11,364][14251] Num frames 7800... +[2023-02-25 17:59:11,528][14251] Num frames 7900... +[2023-02-25 17:59:11,665][14251] Avg episode rewards: #0: 33.073, true rewards: #0: 13.240 +[2023-02-25 17:59:11,668][14251] Avg episode reward: 33.073, avg true_objective: 13.240 +[2023-02-25 17:59:11,773][14251] Num frames 8000... +[2023-02-25 17:59:11,938][14251] Num frames 8100... +[2023-02-25 17:59:12,098][14251] Num frames 8200... +[2023-02-25 17:59:12,255][14251] Num frames 8300... +[2023-02-25 17:59:12,412][14251] Num frames 8400... +[2023-02-25 17:59:12,565][14251] Num frames 8500... +[2023-02-25 17:59:12,696][14251] Num frames 8600... +[2023-02-25 17:59:12,819][14251] Num frames 8700... +[2023-02-25 17:59:12,938][14251] Num frames 8800... +[2023-02-25 17:59:13,066][14251] Num frames 8900... +[2023-02-25 17:59:13,176][14251] Num frames 9000... +[2023-02-25 17:59:13,286][14251] Num frames 9100... +[2023-02-25 17:59:13,400][14251] Num frames 9200... +[2023-02-25 17:59:13,516][14251] Num frames 9300... +[2023-02-25 17:59:13,632][14251] Num frames 9400... +[2023-02-25 17:59:13,752][14251] Num frames 9500... +[2023-02-25 17:59:13,875][14251] Num frames 9600... +[2023-02-25 17:59:13,993][14251] Num frames 9700... +[2023-02-25 17:59:14,109][14251] Num frames 9800... +[2023-02-25 17:59:14,240][14251] Num frames 9900... +[2023-02-25 17:59:14,363][14251] Num frames 10000... +[2023-02-25 17:59:14,470][14251] Avg episode rewards: #0: 37.205, true rewards: #0: 14.349 +[2023-02-25 17:59:14,472][14251] Avg episode reward: 37.205, avg true_objective: 14.349 +[2023-02-25 17:59:14,553][14251] Num frames 10100... +[2023-02-25 17:59:14,666][14251] Num frames 10200... +[2023-02-25 17:59:14,793][14251] Num frames 10300... +[2023-02-25 17:59:14,913][14251] Num frames 10400... +[2023-02-25 17:59:15,032][14251] Num frames 10500... +[2023-02-25 17:59:15,158][14251] Num frames 10600... +[2023-02-25 17:59:15,282][14251] Num frames 10700... +[2023-02-25 17:59:15,399][14251] Num frames 10800... +[2023-02-25 17:59:15,508][14251] Avg episode rewards: #0: 34.805, true rewards: #0: 13.555 +[2023-02-25 17:59:15,511][14251] Avg episode reward: 34.805, avg true_objective: 13.555 +[2023-02-25 17:59:15,578][14251] Num frames 10900... +[2023-02-25 17:59:15,702][14251] Num frames 11000... +[2023-02-25 17:59:15,826][14251] Num frames 11100... +[2023-02-25 17:59:15,936][14251] Num frames 11200... +[2023-02-25 17:59:16,050][14251] Num frames 11300... +[2023-02-25 17:59:16,171][14251] Num frames 11400... +[2023-02-25 17:59:16,290][14251] Num frames 11500... +[2023-02-25 17:59:16,426][14251] Num frames 11600... +[2023-02-25 17:59:16,543][14251] Num frames 11700... +[2023-02-25 17:59:16,668][14251] Num frames 11800... +[2023-02-25 17:59:16,792][14251] Num frames 11900... +[2023-02-25 17:59:16,913][14251] Num frames 12000... +[2023-02-25 17:59:17,031][14251] Num frames 12100... +[2023-02-25 17:59:17,146][14251] Num frames 12200... +[2023-02-25 17:59:17,260][14251] Num frames 12300... +[2023-02-25 17:59:17,378][14251] Num frames 12400... +[2023-02-25 17:59:17,448][14251] Avg episode rewards: #0: 34.791, true rewards: #0: 13.791 +[2023-02-25 17:59:17,456][14251] Avg episode reward: 34.791, avg true_objective: 13.791 +[2023-02-25 17:59:17,558][14251] Num frames 12500... +[2023-02-25 17:59:17,682][14251] Num frames 12600... +[2023-02-25 17:59:17,813][14251] Num frames 12700... +[2023-02-25 17:59:17,930][14251] Num frames 12800... +[2023-02-25 17:59:18,093][14251] Avg episode rewards: #0: 31.992, true rewards: #0: 12.892 +[2023-02-25 17:59:18,094][14251] Avg episode reward: 31.992, avg true_objective: 12.892 +[2023-02-25 18:00:34,064][14251] Replay video saved to /content/train_dir/default_experiment/replay.mp4!