diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1100 @@ +[2024-12-16 17:34:00,061][00620] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-16 17:34:00,063][00620] Rollout worker 0 uses device cpu +[2024-12-16 17:34:00,065][00620] Rollout worker 1 uses device cpu +[2024-12-16 17:34:00,067][00620] Rollout worker 2 uses device cpu +[2024-12-16 17:34:00,068][00620] Rollout worker 3 uses device cpu +[2024-12-16 17:34:00,070][00620] Rollout worker 4 uses device cpu +[2024-12-16 17:34:00,071][00620] Rollout worker 5 uses device cpu +[2024-12-16 17:34:00,072][00620] Rollout worker 6 uses device cpu +[2024-12-16 17:34:00,073][00620] Rollout worker 7 uses device cpu +[2024-12-16 17:34:00,235][00620] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-16 17:34:00,236][00620] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-16 17:34:00,274][00620] Starting all processes... +[2024-12-16 17:34:00,276][00620] Starting process learner_proc0 +[2024-12-16 17:34:00,322][00620] Starting all processes... +[2024-12-16 17:34:00,332][00620] Starting process inference_proc0-0 +[2024-12-16 17:34:00,332][00620] Starting process rollout_proc0 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc1 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc2 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc3 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc4 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc5 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc6 +[2024-12-16 17:34:00,335][00620] Starting process rollout_proc7 +[2024-12-16 17:34:18,568][02760] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-16 17:34:18,569][02760] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-12-16 17:34:18,639][02760] Num visible devices: 1 +[2024-12-16 17:34:18,685][02760] Starting seed is not provided +[2024-12-16 17:34:18,686][02760] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-16 17:34:18,687][02760] Initializing actor-critic model on device cuda:0 +[2024-12-16 17:34:18,688][02760] RunningMeanStd input shape: (3, 72, 128) +[2024-12-16 17:34:18,691][02760] RunningMeanStd input shape: (1,) +[2024-12-16 17:34:18,797][02760] ConvEncoder: input_channels=3 +[2024-12-16 17:34:18,848][02775] Worker 1 uses CPU cores [1] +[2024-12-16 17:34:18,873][02778] Worker 4 uses CPU cores [0] +[2024-12-16 17:34:18,922][02774] Worker 0 uses CPU cores [0] +[2024-12-16 17:34:18,975][02779] Worker 6 uses CPU cores [0] +[2024-12-16 17:34:19,005][02776] Worker 2 uses CPU cores [0] +[2024-12-16 17:34:19,068][02781] Worker 5 uses CPU cores [1] +[2024-12-16 17:34:19,099][02777] Worker 3 uses CPU cores [1] +[2024-12-16 17:34:19,107][02780] Worker 7 uses CPU cores [1] +[2024-12-16 17:34:19,126][02773] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-16 17:34:19,126][02773] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-12-16 17:34:19,144][02773] Num visible devices: 1 +[2024-12-16 17:34:19,211][02760] Conv encoder output size: 512 +[2024-12-16 17:34:19,211][02760] Policy head output size: 512 +[2024-12-16 17:34:19,263][02760] Created Actor Critic model with architecture: +[2024-12-16 17:34:19,264][02760] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-12-16 17:34:19,684][02760] Using optimizer +[2024-12-16 17:34:20,232][00620] Heartbeat connected on Batcher_0 +[2024-12-16 17:34:20,235][00620] Heartbeat connected on InferenceWorker_p0-w0 +[2024-12-16 17:34:20,245][00620] Heartbeat connected on RolloutWorker_w0 +[2024-12-16 17:34:20,251][00620] Heartbeat connected on RolloutWorker_w1 +[2024-12-16 17:34:20,257][00620] Heartbeat connected on RolloutWorker_w2 +[2024-12-16 17:34:20,263][00620] Heartbeat connected on RolloutWorker_w4 +[2024-12-16 17:34:20,265][00620] Heartbeat connected on RolloutWorker_w3 +[2024-12-16 17:34:20,267][00620] Heartbeat connected on RolloutWorker_w5 +[2024-12-16 17:34:20,272][00620] Heartbeat connected on RolloutWorker_w6 +[2024-12-16 17:34:20,274][00620] Heartbeat connected on RolloutWorker_w7 +[2024-12-16 17:34:23,486][02760] No checkpoints found +[2024-12-16 17:34:23,486][02760] Did not load from checkpoint, starting from scratch! +[2024-12-16 17:34:23,486][02760] Initialized policy 0 weights for model version 0 +[2024-12-16 17:34:23,490][02760] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-16 17:34:23,497][02760] LearnerWorker_p0 finished initialization! +[2024-12-16 17:34:23,497][00620] Heartbeat connected on LearnerWorker_p0 +[2024-12-16 17:34:23,689][02773] RunningMeanStd input shape: (3, 72, 128) +[2024-12-16 17:34:23,690][02773] RunningMeanStd input shape: (1,) +[2024-12-16 17:34:23,704][02773] ConvEncoder: input_channels=3 +[2024-12-16 17:34:23,810][02773] Conv encoder output size: 512 +[2024-12-16 17:34:23,811][02773] Policy head output size: 512 +[2024-12-16 17:34:23,866][00620] Inference worker 0-0 is ready! +[2024-12-16 17:34:23,867][00620] All inference workers are ready! Signal rollout workers to start! +[2024-12-16 17:34:24,069][02779] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,067][02776] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,071][02778] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,074][02774] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,080][02781] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,078][02780] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,079][02775] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,082][02777] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:34:24,734][02777] Decorrelating experience for 0 frames... +[2024-12-16 17:34:25,150][02780] Decorrelating experience for 0 frames... +[2024-12-16 17:34:25,593][02780] Decorrelating experience for 32 frames... +[2024-12-16 17:34:25,835][02778] Decorrelating experience for 0 frames... +[2024-12-16 17:34:25,852][02774] Decorrelating experience for 0 frames... +[2024-12-16 17:34:25,855][02776] Decorrelating experience for 0 frames... +[2024-12-16 17:34:25,855][02779] Decorrelating experience for 0 frames... +[2024-12-16 17:34:26,326][00620] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-16 17:34:26,561][02780] Decorrelating experience for 64 frames... +[2024-12-16 17:34:26,960][02775] Decorrelating experience for 0 frames... +[2024-12-16 17:34:26,959][02781] Decorrelating experience for 0 frames... +[2024-12-16 17:34:26,979][02776] Decorrelating experience for 32 frames... +[2024-12-16 17:34:26,982][02779] Decorrelating experience for 32 frames... +[2024-12-16 17:34:26,986][02774] Decorrelating experience for 32 frames... +[2024-12-16 17:34:27,967][02778] Decorrelating experience for 32 frames... +[2024-12-16 17:34:28,074][02775] Decorrelating experience for 32 frames... +[2024-12-16 17:34:28,179][02776] Decorrelating experience for 64 frames... +[2024-12-16 17:34:28,177][02780] Decorrelating experience for 96 frames... +[2024-12-16 17:34:28,191][02777] Decorrelating experience for 32 frames... +[2024-12-16 17:34:30,046][02774] Decorrelating experience for 64 frames... +[2024-12-16 17:34:30,100][02778] Decorrelating experience for 64 frames... +[2024-12-16 17:34:30,367][02781] Decorrelating experience for 32 frames... +[2024-12-16 17:34:30,811][02775] Decorrelating experience for 64 frames... +[2024-12-16 17:34:31,068][02777] Decorrelating experience for 64 frames... +[2024-12-16 17:34:31,326][00620] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 2.4. Samples: 12. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-16 17:34:32,644][02776] Decorrelating experience for 96 frames... +[2024-12-16 17:34:32,798][02779] Decorrelating experience for 64 frames... +[2024-12-16 17:34:32,857][02778] Decorrelating experience for 96 frames... +[2024-12-16 17:34:33,464][02774] Decorrelating experience for 96 frames... +[2024-12-16 17:34:33,659][02775] Decorrelating experience for 96 frames... +[2024-12-16 17:34:34,023][02777] Decorrelating experience for 96 frames... +[2024-12-16 17:34:35,512][02779] Decorrelating experience for 96 frames... +[2024-12-16 17:34:36,326][00620] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 172.4. Samples: 1724. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-16 17:34:36,331][00620] Avg episode reward: [(0, '2.463')] +[2024-12-16 17:34:36,951][02760] Signal inference workers to stop experience collection... +[2024-12-16 17:34:36,972][02773] InferenceWorker_p0-w0: stopping experience collection +[2024-12-16 17:34:37,021][02781] Decorrelating experience for 64 frames... +[2024-12-16 17:34:37,371][02781] Decorrelating experience for 96 frames... +[2024-12-16 17:34:40,330][02760] Signal inference workers to resume experience collection... +[2024-12-16 17:34:40,332][02773] InferenceWorker_p0-w0: resuming experience collection +[2024-12-16 17:34:41,326][00620] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 8192. Throughput: 0: 166.1. Samples: 2492. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-12-16 17:34:41,328][00620] Avg episode reward: [(0, '2.752')] +[2024-12-16 17:34:46,326][00620] Fps is (10 sec: 2867.2, 60 sec: 1433.6, 300 sec: 1433.6). Total num frames: 28672. Throughput: 0: 362.1. Samples: 7242. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:34:46,332][00620] Avg episode reward: [(0, '3.718')] +[2024-12-16 17:34:49,904][02773] Updated weights for policy 0, policy_version 10 (0.0173) +[2024-12-16 17:34:51,326][00620] Fps is (10 sec: 3276.7, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 40960. Throughput: 0: 456.6. Samples: 11416. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:34:51,329][00620] Avg episode reward: [(0, '4.173')] +[2024-12-16 17:34:56,326][00620] Fps is (10 sec: 2867.2, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 439.9. Samples: 13198. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:34:56,329][00620] Avg episode reward: [(0, '4.425')] +[2024-12-16 17:35:01,003][02773] Updated weights for policy 0, policy_version 20 (0.0038) +[2024-12-16 17:35:01,326][00620] Fps is (10 sec: 4096.1, 60 sec: 2340.6, 300 sec: 2340.6). Total num frames: 81920. Throughput: 0: 563.8. Samples: 19732. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:35:01,328][00620] Avg episode reward: [(0, '4.497')] +[2024-12-16 17:35:06,326][00620] Fps is (10 sec: 4096.0, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 98304. Throughput: 0: 633.3. Samples: 25334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:35:06,329][00620] Avg episode reward: [(0, '4.496')] +[2024-12-16 17:35:11,328][00620] Fps is (10 sec: 3276.0, 60 sec: 2548.5, 300 sec: 2548.5). Total num frames: 114688. Throughput: 0: 606.3. Samples: 27286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:35:11,331][00620] Avg episode reward: [(0, '4.410')] +[2024-12-16 17:35:11,337][02760] Saving new best policy, reward=4.410! +[2024-12-16 17:35:13,204][02773] Updated weights for policy 0, policy_version 30 (0.0033) +[2024-12-16 17:35:16,326][00620] Fps is (10 sec: 3276.8, 60 sec: 2621.4, 300 sec: 2621.4). Total num frames: 131072. Throughput: 0: 726.1. Samples: 32686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:35:16,333][00620] Avg episode reward: [(0, '4.477')] +[2024-12-16 17:35:16,337][02760] Saving new best policy, reward=4.477! +[2024-12-16 17:35:21,329][00620] Fps is (10 sec: 4095.8, 60 sec: 2829.8, 300 sec: 2829.8). Total num frames: 155648. Throughput: 0: 831.1. Samples: 39128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:35:21,334][00620] Avg episode reward: [(0, '4.604')] +[2024-12-16 17:35:21,343][02760] Saving new best policy, reward=4.604! +[2024-12-16 17:35:24,468][02773] Updated weights for policy 0, policy_version 40 (0.0017) +[2024-12-16 17:35:26,326][00620] Fps is (10 sec: 3686.4, 60 sec: 2798.9, 300 sec: 2798.9). Total num frames: 167936. Throughput: 0: 856.4. Samples: 41030. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:35:26,330][00620] Avg episode reward: [(0, '4.546')] +[2024-12-16 17:35:31,326][00620] Fps is (10 sec: 2868.0, 60 sec: 3072.0, 300 sec: 2835.7). Total num frames: 184320. Throughput: 0: 858.1. Samples: 45856. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:35:31,328][00620] Avg episode reward: [(0, '4.384')] +[2024-12-16 17:35:35,094][02773] Updated weights for policy 0, policy_version 50 (0.0017) +[2024-12-16 17:35:36,326][00620] Fps is (10 sec: 4096.1, 60 sec: 3481.6, 300 sec: 2984.2). Total num frames: 208896. Throughput: 0: 916.2. Samples: 52644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:35:36,333][00620] Avg episode reward: [(0, '4.228')] +[2024-12-16 17:35:41,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3003.7). Total num frames: 225280. Throughput: 0: 944.2. Samples: 55686. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:35:41,332][00620] Avg episode reward: [(0, '4.176')] +[2024-12-16 17:35:46,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3020.8). Total num frames: 241664. Throughput: 0: 889.2. Samples: 59746. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:35:46,328][00620] Avg episode reward: [(0, '4.233')] +[2024-12-16 17:35:46,888][02773] Updated weights for policy 0, policy_version 60 (0.0034) +[2024-12-16 17:35:51,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3084.0). Total num frames: 262144. Throughput: 0: 912.5. Samples: 66398. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:35:51,330][00620] Avg episode reward: [(0, '4.299')] +[2024-12-16 17:35:51,336][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000064_262144.pth... +[2024-12-16 17:35:56,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3140.3). Total num frames: 282624. Throughput: 0: 943.3. Samples: 69732. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:35:56,331][00620] Avg episode reward: [(0, '4.174')] +[2024-12-16 17:35:56,893][02773] Updated weights for policy 0, policy_version 70 (0.0013) +[2024-12-16 17:36:01,329][00620] Fps is (10 sec: 3275.8, 60 sec: 3549.7, 300 sec: 3104.2). Total num frames: 294912. Throughput: 0: 923.6. Samples: 74252. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:36:01,332][00620] Avg episode reward: [(0, '4.140')] +[2024-12-16 17:36:06,330][00620] Fps is (10 sec: 3275.5, 60 sec: 3617.9, 300 sec: 3153.8). Total num frames: 315392. Throughput: 0: 908.2. Samples: 79998. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:36:06,332][00620] Avg episode reward: [(0, '4.376')] +[2024-12-16 17:36:08,132][02773] Updated weights for policy 0, policy_version 80 (0.0032) +[2024-12-16 17:36:11,326][00620] Fps is (10 sec: 4507.0, 60 sec: 3754.8, 300 sec: 3237.8). Total num frames: 339968. Throughput: 0: 941.3. Samples: 83388. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:36:11,333][00620] Avg episode reward: [(0, '4.452')] +[2024-12-16 17:36:16,326][00620] Fps is (10 sec: 4097.6, 60 sec: 3754.7, 300 sec: 3239.6). Total num frames: 356352. Throughput: 0: 961.1. Samples: 89106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:36:16,329][00620] Avg episode reward: [(0, '4.261')] +[2024-12-16 17:36:19,835][02773] Updated weights for policy 0, policy_version 90 (0.0022) +[2024-12-16 17:36:21,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3241.2). Total num frames: 372736. Throughput: 0: 916.4. Samples: 93882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:36:21,328][00620] Avg episode reward: [(0, '4.416')] +[2024-12-16 17:36:26,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3310.9). Total num frames: 397312. Throughput: 0: 925.3. Samples: 97326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:36:26,328][00620] Avg episode reward: [(0, '4.602')] +[2024-12-16 17:36:28,920][02773] Updated weights for policy 0, policy_version 100 (0.0017) +[2024-12-16 17:36:31,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3309.6). Total num frames: 413696. Throughput: 0: 980.3. Samples: 103860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:36:31,329][00620] Avg episode reward: [(0, '4.515')] +[2024-12-16 17:36:36,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3308.3). Total num frames: 430080. Throughput: 0: 924.7. Samples: 108010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:36:36,331][00620] Avg episode reward: [(0, '4.610')] +[2024-12-16 17:36:36,335][02760] Saving new best policy, reward=4.610! +[2024-12-16 17:36:40,682][02773] Updated weights for policy 0, policy_version 110 (0.0021) +[2024-12-16 17:36:41,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3337.5). Total num frames: 450560. Throughput: 0: 919.2. Samples: 111098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:36:41,332][00620] Avg episode reward: [(0, '4.637')] +[2024-12-16 17:36:41,341][02760] Saving new best policy, reward=4.637! +[2024-12-16 17:36:46,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3393.8). Total num frames: 475136. Throughput: 0: 969.7. Samples: 117884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:36:46,330][00620] Avg episode reward: [(0, '4.585')] +[2024-12-16 17:36:51,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3361.5). Total num frames: 487424. Throughput: 0: 947.7. Samples: 122642. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-16 17:36:51,330][00620] Avg episode reward: [(0, '4.568')] +[2024-12-16 17:36:52,279][02773] Updated weights for policy 0, policy_version 120 (0.0034) +[2024-12-16 17:36:56,326][00620] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3358.7). Total num frames: 503808. Throughput: 0: 922.5. Samples: 124900. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:36:56,328][00620] Avg episode reward: [(0, '4.589')] +[2024-12-16 17:37:01,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3891.4, 300 sec: 3408.9). Total num frames: 528384. Throughput: 0: 942.8. Samples: 131530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:37:01,331][00620] Avg episode reward: [(0, '4.550')] +[2024-12-16 17:37:01,825][02773] Updated weights for policy 0, policy_version 130 (0.0039) +[2024-12-16 17:37:06,328][00620] Fps is (10 sec: 4095.2, 60 sec: 3823.1, 300 sec: 3404.8). Total num frames: 544768. Throughput: 0: 961.8. Samples: 137164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:37:06,333][00620] Avg episode reward: [(0, '4.638')] +[2024-12-16 17:37:11,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3400.9). Total num frames: 561152. Throughput: 0: 929.4. Samples: 139150. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:37:11,330][00620] Avg episode reward: [(0, '4.708')] +[2024-12-16 17:37:11,339][02760] Saving new best policy, reward=4.708! +[2024-12-16 17:37:13,721][02773] Updated weights for policy 0, policy_version 140 (0.0017) +[2024-12-16 17:37:16,326][00620] Fps is (10 sec: 3687.1, 60 sec: 3754.7, 300 sec: 3421.4). Total num frames: 581632. Throughput: 0: 919.1. Samples: 145220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:37:16,331][00620] Avg episode reward: [(0, '4.257')] +[2024-12-16 17:37:21,330][00620] Fps is (10 sec: 4094.3, 60 sec: 3822.7, 300 sec: 3440.6). Total num frames: 602112. Throughput: 0: 976.8. Samples: 151968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:37:21,332][00620] Avg episode reward: [(0, '4.253')] +[2024-12-16 17:37:24,423][02773] Updated weights for policy 0, policy_version 150 (0.0021) +[2024-12-16 17:37:26,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3436.1). Total num frames: 618496. Throughput: 0: 952.4. Samples: 153954. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:37:26,336][00620] Avg episode reward: [(0, '4.369')] +[2024-12-16 17:37:31,326][00620] Fps is (10 sec: 3687.9, 60 sec: 3754.7, 300 sec: 3453.9). Total num frames: 638976. Throughput: 0: 916.6. Samples: 159130. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:37:31,333][00620] Avg episode reward: [(0, '4.610')] +[2024-12-16 17:37:34,535][02773] Updated weights for policy 0, policy_version 160 (0.0028) +[2024-12-16 17:37:36,326][00620] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3492.4). Total num frames: 663552. Throughput: 0: 962.6. Samples: 165960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:37:36,331][00620] Avg episode reward: [(0, '4.686')] +[2024-12-16 17:37:41,326][00620] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3486.8). Total num frames: 679936. Throughput: 0: 976.0. Samples: 168822. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:37:41,331][00620] Avg episode reward: [(0, '4.465')] +[2024-12-16 17:37:46,280][02773] Updated weights for policy 0, policy_version 170 (0.0023) +[2024-12-16 17:37:46,326][00620] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3481.6). Total num frames: 696320. Throughput: 0: 922.8. Samples: 173056. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:37:46,328][00620] Avg episode reward: [(0, '4.501')] +[2024-12-16 17:37:51,326][00620] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3496.6). Total num frames: 716800. Throughput: 0: 952.8. Samples: 180038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:37:51,332][00620] Avg episode reward: [(0, '4.568')] +[2024-12-16 17:37:51,341][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000175_716800.pth... +[2024-12-16 17:37:55,619][02773] Updated weights for policy 0, policy_version 180 (0.0033) +[2024-12-16 17:37:56,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3510.9). Total num frames: 737280. Throughput: 0: 983.3. Samples: 183400. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:37:56,328][00620] Avg episode reward: [(0, '4.483')] +[2024-12-16 17:38:01,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3486.4). Total num frames: 749568. Throughput: 0: 947.6. Samples: 187860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:38:01,330][00620] Avg episode reward: [(0, '4.612')] +[2024-12-16 17:38:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3518.8). Total num frames: 774144. Throughput: 0: 931.7. Samples: 193890. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:38:06,332][00620] Avg episode reward: [(0, '4.744')] +[2024-12-16 17:38:06,335][02760] Saving new best policy, reward=4.744! +[2024-12-16 17:38:07,059][02773] Updated weights for policy 0, policy_version 190 (0.0029) +[2024-12-16 17:38:11,326][00620] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3531.7). Total num frames: 794624. Throughput: 0: 961.4. Samples: 197218. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:38:11,333][00620] Avg episode reward: [(0, '4.747')] +[2024-12-16 17:38:11,342][02760] Saving new best policy, reward=4.747! +[2024-12-16 17:38:16,326][00620] Fps is (10 sec: 3686.1, 60 sec: 3822.9, 300 sec: 3526.1). Total num frames: 811008. Throughput: 0: 970.7. Samples: 202814. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:38:16,330][00620] Avg episode reward: [(0, '4.602')] +[2024-12-16 17:38:18,368][02773] Updated weights for policy 0, policy_version 200 (0.0024) +[2024-12-16 17:38:21,326][00620] Fps is (10 sec: 3276.9, 60 sec: 3754.9, 300 sec: 3520.8). Total num frames: 827392. Throughput: 0: 930.0. Samples: 207810. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:38:21,328][00620] Avg episode reward: [(0, '4.444')] +[2024-12-16 17:38:26,326][00620] Fps is (10 sec: 4096.3, 60 sec: 3891.2, 300 sec: 3549.9). Total num frames: 851968. Throughput: 0: 944.0. Samples: 211300. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:38:26,334][00620] Avg episode reward: [(0, '4.531')] +[2024-12-16 17:38:27,756][02773] Updated weights for policy 0, policy_version 210 (0.0017) +[2024-12-16 17:38:31,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3544.3). Total num frames: 868352. Throughput: 0: 989.9. Samples: 217602. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:38:31,330][00620] Avg episode reward: [(0, '4.459')] +[2024-12-16 17:38:36,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3538.9). Total num frames: 884736. Throughput: 0: 926.6. Samples: 221734. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:38:36,330][00620] Avg episode reward: [(0, '4.567')] +[2024-12-16 17:38:39,314][02773] Updated weights for policy 0, policy_version 220 (0.0018) +[2024-12-16 17:38:41,326][00620] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3565.9). Total num frames: 909312. Throughput: 0: 929.2. Samples: 225214. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:38:41,331][00620] Avg episode reward: [(0, '4.836')] +[2024-12-16 17:38:41,340][02760] Saving new best policy, reward=4.836! +[2024-12-16 17:38:46,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3576.1). Total num frames: 929792. Throughput: 0: 984.7. Samples: 232172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:38:46,333][00620] Avg episode reward: [(0, '4.549')] +[2024-12-16 17:38:49,504][02773] Updated weights for policy 0, policy_version 230 (0.0019) +[2024-12-16 17:38:51,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3570.5). Total num frames: 946176. Throughput: 0: 955.1. Samples: 236870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:38:51,333][00620] Avg episode reward: [(0, '4.586')] +[2024-12-16 17:38:56,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3580.2). Total num frames: 966656. Throughput: 0: 942.6. Samples: 239636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:38:56,330][00620] Avg episode reward: [(0, '4.460')] +[2024-12-16 17:38:59,723][02773] Updated weights for policy 0, policy_version 240 (0.0031) +[2024-12-16 17:39:01,326][00620] Fps is (10 sec: 4095.8, 60 sec: 3959.4, 300 sec: 3589.6). Total num frames: 987136. Throughput: 0: 970.8. Samples: 246500. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:39:01,334][00620] Avg episode reward: [(0, '4.414')] +[2024-12-16 17:39:06,327][00620] Fps is (10 sec: 3686.1, 60 sec: 3822.9, 300 sec: 3584.0). Total num frames: 1003520. Throughput: 0: 982.0. Samples: 252000. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:39:06,330][00620] Avg episode reward: [(0, '4.749')] +[2024-12-16 17:39:11,282][02773] Updated weights for policy 0, policy_version 250 (0.0018) +[2024-12-16 17:39:11,326][00620] Fps is (10 sec: 3686.5, 60 sec: 3823.0, 300 sec: 3593.0). Total num frames: 1024000. Throughput: 0: 952.3. Samples: 254154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:39:11,329][00620] Avg episode reward: [(0, '4.781')] +[2024-12-16 17:39:16,326][00620] Fps is (10 sec: 4096.4, 60 sec: 3891.2, 300 sec: 3601.7). Total num frames: 1044480. Throughput: 0: 961.8. Samples: 260882. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:39:16,327][00620] Avg episode reward: [(0, '4.575')] +[2024-12-16 17:39:19,953][02773] Updated weights for policy 0, policy_version 260 (0.0024) +[2024-12-16 17:39:21,330][00620] Fps is (10 sec: 4503.7, 60 sec: 4027.5, 300 sec: 3623.9). Total num frames: 1069056. Throughput: 0: 1015.6. Samples: 267438. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:39:21,336][00620] Avg episode reward: [(0, '4.531')] +[2024-12-16 17:39:26,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 1081344. Throughput: 0: 983.7. Samples: 269480. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:39:26,328][00620] Avg episode reward: [(0, '4.619')] +[2024-12-16 17:39:31,326][00620] Fps is (10 sec: 3278.2, 60 sec: 3891.2, 300 sec: 3735.0). Total num frames: 1101824. Throughput: 0: 954.2. Samples: 275112. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:39:31,328][00620] Avg episode reward: [(0, '4.837')] +[2024-12-16 17:39:31,386][02760] Saving new best policy, reward=4.837! +[2024-12-16 17:39:31,395][02773] Updated weights for policy 0, policy_version 270 (0.0019) +[2024-12-16 17:39:36,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3790.5). Total num frames: 1126400. Throughput: 0: 1005.3. Samples: 282108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:39:36,329][00620] Avg episode reward: [(0, '4.678')] +[2024-12-16 17:39:41,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1138688. Throughput: 0: 1000.2. Samples: 284646. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:39:41,329][00620] Avg episode reward: [(0, '4.530')] +[2024-12-16 17:39:42,797][02773] Updated weights for policy 0, policy_version 280 (0.0016) +[2024-12-16 17:39:46,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1159168. Throughput: 0: 953.7. Samples: 289418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:39:46,333][00620] Avg episode reward: [(0, '4.674')] +[2024-12-16 17:39:51,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 1183744. Throughput: 0: 987.8. Samples: 296448. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:39:51,332][00620] Avg episode reward: [(0, '4.705')] +[2024-12-16 17:39:51,341][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000289_1183744.pth... +[2024-12-16 17:39:51,462][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000064_262144.pth +[2024-12-16 17:39:51,875][02773] Updated weights for policy 0, policy_version 290 (0.0015) +[2024-12-16 17:39:56,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 1200128. Throughput: 0: 1013.1. Samples: 299744. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:39:56,331][00620] Avg episode reward: [(0, '4.585')] +[2024-12-16 17:40:01,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3790.5). Total num frames: 1216512. Throughput: 0: 956.2. Samples: 303912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:40:01,328][00620] Avg episode reward: [(0, '4.637')] +[2024-12-16 17:40:03,522][02773] Updated weights for policy 0, policy_version 300 (0.0030) +[2024-12-16 17:40:06,327][00620] Fps is (10 sec: 4095.6, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 1241088. Throughput: 0: 956.4. Samples: 310472. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:40:06,331][00620] Avg episode reward: [(0, '4.794')] +[2024-12-16 17:40:11,326][00620] Fps is (10 sec: 4505.4, 60 sec: 3959.4, 300 sec: 3832.2). Total num frames: 1261568. Throughput: 0: 987.1. Samples: 313898. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:40:11,328][00620] Avg episode reward: [(0, '4.620')] +[2024-12-16 17:40:13,589][02773] Updated weights for policy 0, policy_version 310 (0.0014) +[2024-12-16 17:40:16,326][00620] Fps is (10 sec: 3277.1, 60 sec: 3822.9, 300 sec: 3790.6). Total num frames: 1273856. Throughput: 0: 974.3. Samples: 318954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:40:16,333][00620] Avg episode reward: [(0, '4.754')] +[2024-12-16 17:40:21,326][00620] Fps is (10 sec: 3276.9, 60 sec: 3754.9, 300 sec: 3818.3). Total num frames: 1294336. Throughput: 0: 944.0. Samples: 324588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:40:21,328][00620] Avg episode reward: [(0, '4.847')] +[2024-12-16 17:40:21,339][02760] Saving new best policy, reward=4.847! +[2024-12-16 17:40:24,098][02773] Updated weights for policy 0, policy_version 320 (0.0035) +[2024-12-16 17:40:26,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 1318912. Throughput: 0: 963.9. Samples: 328020. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:40:26,327][00620] Avg episode reward: [(0, '4.960')] +[2024-12-16 17:40:26,335][02760] Saving new best policy, reward=4.960! +[2024-12-16 17:40:31,327][00620] Fps is (10 sec: 4095.3, 60 sec: 3891.1, 300 sec: 3818.3). Total num frames: 1335296. Throughput: 0: 986.5. Samples: 333814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:40:31,330][00620] Avg episode reward: [(0, '4.914')] +[2024-12-16 17:40:35,859][02773] Updated weights for policy 0, policy_version 330 (0.0014) +[2024-12-16 17:40:36,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 1351680. Throughput: 0: 935.9. Samples: 338564. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:40:36,327][00620] Avg episode reward: [(0, '4.754')] +[2024-12-16 17:40:41,326][00620] Fps is (10 sec: 4096.7, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 1376256. Throughput: 0: 941.6. Samples: 342114. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:40:41,331][00620] Avg episode reward: [(0, '4.708')] +[2024-12-16 17:40:44,674][02773] Updated weights for policy 0, policy_version 340 (0.0018) +[2024-12-16 17:40:46,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 1396736. Throughput: 0: 1004.3. Samples: 349104. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:40:46,330][00620] Avg episode reward: [(0, '4.777')] +[2024-12-16 17:40:51,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 1409024. Throughput: 0: 952.5. Samples: 353332. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-16 17:40:51,329][00620] Avg episode reward: [(0, '4.809')] +[2024-12-16 17:40:56,006][02773] Updated weights for policy 0, policy_version 350 (0.0013) +[2024-12-16 17:40:56,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1433600. Throughput: 0: 945.8. Samples: 356460. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:40:56,328][00620] Avg episode reward: [(0, '4.903')] +[2024-12-16 17:41:01,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 1454080. Throughput: 0: 986.8. Samples: 363360. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:41:01,332][00620] Avg episode reward: [(0, '5.063')] +[2024-12-16 17:41:01,397][02760] Saving new best policy, reward=5.063! +[2024-12-16 17:41:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 1470464. Throughput: 0: 972.4. Samples: 368348. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:41:06,328][00620] Avg episode reward: [(0, '5.103')] +[2024-12-16 17:41:06,332][02760] Saving new best policy, reward=5.103! +[2024-12-16 17:41:07,065][02773] Updated weights for policy 0, policy_version 360 (0.0013) +[2024-12-16 17:41:11,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3846.1). Total num frames: 1490944. Throughput: 0: 945.9. Samples: 370584. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:41:11,333][00620] Avg episode reward: [(0, '5.160')] +[2024-12-16 17:41:11,340][02760] Saving new best policy, reward=5.160! +[2024-12-16 17:41:16,275][02773] Updated weights for policy 0, policy_version 370 (0.0025) +[2024-12-16 17:41:16,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 1515520. Throughput: 0: 975.1. Samples: 377694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:41:16,328][00620] Avg episode reward: [(0, '5.194')] +[2024-12-16 17:41:16,335][02760] Saving new best policy, reward=5.194! +[2024-12-16 17:41:21,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 1531904. Throughput: 0: 1001.6. Samples: 383634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:41:21,331][00620] Avg episode reward: [(0, '5.206')] +[2024-12-16 17:41:21,344][02760] Saving new best policy, reward=5.206! +[2024-12-16 17:41:26,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 1548288. Throughput: 0: 967.8. Samples: 385664. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:41:26,332][00620] Avg episode reward: [(0, '5.030')] +[2024-12-16 17:41:27,995][02773] Updated weights for policy 0, policy_version 380 (0.0015) +[2024-12-16 17:41:31,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 1572864. Throughput: 0: 953.6. Samples: 392018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:41:31,330][00620] Avg episode reward: [(0, '5.268')] +[2024-12-16 17:41:31,338][02760] Saving new best policy, reward=5.268! +[2024-12-16 17:41:36,326][00620] Fps is (10 sec: 4505.4, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 1593344. Throughput: 0: 1017.6. Samples: 399124. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:41:36,328][00620] Avg episode reward: [(0, '5.148')] +[2024-12-16 17:41:36,782][02773] Updated weights for policy 0, policy_version 390 (0.0036) +[2024-12-16 17:41:41,329][00620] Fps is (10 sec: 3685.3, 60 sec: 3891.0, 300 sec: 3846.0). Total num frames: 1609728. Throughput: 0: 998.0. Samples: 401374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:41:41,331][00620] Avg episode reward: [(0, '5.311')] +[2024-12-16 17:41:41,349][02760] Saving new best policy, reward=5.311! +[2024-12-16 17:41:46,326][00620] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1630208. Throughput: 0: 966.2. Samples: 406838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:41:46,333][00620] Avg episode reward: [(0, '5.303')] +[2024-12-16 17:41:47,673][02773] Updated weights for policy 0, policy_version 400 (0.0021) +[2024-12-16 17:41:51,326][00620] Fps is (10 sec: 4507.0, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 1654784. Throughput: 0: 1019.3. Samples: 414216. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:41:51,329][00620] Avg episode reward: [(0, '5.408')] +[2024-12-16 17:41:51,338][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000404_1654784.pth... +[2024-12-16 17:41:51,459][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000175_716800.pth +[2024-12-16 17:41:51,482][02760] Saving new best policy, reward=5.408! +[2024-12-16 17:41:56,326][00620] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 1671168. Throughput: 0: 1032.3. Samples: 417036. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:41:56,330][00620] Avg episode reward: [(0, '5.664')] +[2024-12-16 17:41:56,335][02760] Saving new best policy, reward=5.664! +[2024-12-16 17:41:58,720][02773] Updated weights for policy 0, policy_version 410 (0.0049) +[2024-12-16 17:42:01,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 1687552. Throughput: 0: 973.9. Samples: 421518. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:01,328][00620] Avg episode reward: [(0, '5.695')] +[2024-12-16 17:42:01,337][02760] Saving new best policy, reward=5.695! +[2024-12-16 17:42:06,326][00620] Fps is (10 sec: 4095.9, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 1712128. Throughput: 0: 996.7. Samples: 428486. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:06,328][00620] Avg episode reward: [(0, '5.635')] +[2024-12-16 17:42:07,540][02773] Updated weights for policy 0, policy_version 420 (0.0016) +[2024-12-16 17:42:11,332][00620] Fps is (10 sec: 4502.8, 60 sec: 4027.3, 300 sec: 3901.5). Total num frames: 1732608. Throughput: 0: 1033.5. Samples: 432180. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:42:11,334][00620] Avg episode reward: [(0, '5.971')] +[2024-12-16 17:42:11,355][02760] Saving new best policy, reward=5.971! +[2024-12-16 17:42:16,327][00620] Fps is (10 sec: 3276.5, 60 sec: 3822.9, 300 sec: 3873.9). Total num frames: 1744896. Throughput: 0: 994.3. Samples: 436764. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:16,336][00620] Avg episode reward: [(0, '6.131')] +[2024-12-16 17:42:16,342][02760] Saving new best policy, reward=6.131! +[2024-12-16 17:42:19,436][02773] Updated weights for policy 0, policy_version 430 (0.0026) +[2024-12-16 17:42:21,326][00620] Fps is (10 sec: 3688.5, 60 sec: 3959.4, 300 sec: 3901.6). Total num frames: 1769472. Throughput: 0: 970.1. Samples: 442778. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:21,331][00620] Avg episode reward: [(0, '6.306')] +[2024-12-16 17:42:21,338][02760] Saving new best policy, reward=6.306! +[2024-12-16 17:42:26,326][00620] Fps is (10 sec: 4915.7, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 1794048. Throughput: 0: 995.7. Samples: 446176. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:42:26,333][00620] Avg episode reward: [(0, '6.561')] +[2024-12-16 17:42:26,336][02760] Saving new best policy, reward=6.561! +[2024-12-16 17:42:29,238][02773] Updated weights for policy 0, policy_version 440 (0.0015) +[2024-12-16 17:42:31,328][00620] Fps is (10 sec: 3685.8, 60 sec: 3891.1, 300 sec: 3873.8). Total num frames: 1806336. Throughput: 0: 995.7. Samples: 451644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:31,335][00620] Avg episode reward: [(0, '6.684')] +[2024-12-16 17:42:31,347][02760] Saving new best policy, reward=6.684! +[2024-12-16 17:42:36,326][00620] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1826816. Throughput: 0: 951.2. Samples: 457018. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:42:36,333][00620] Avg episode reward: [(0, '6.930')] +[2024-12-16 17:42:36,336][02760] Saving new best policy, reward=6.930! +[2024-12-16 17:42:39,731][02773] Updated weights for policy 0, policy_version 450 (0.0020) +[2024-12-16 17:42:41,326][00620] Fps is (10 sec: 4096.8, 60 sec: 3959.7, 300 sec: 3901.6). Total num frames: 1847296. Throughput: 0: 965.4. Samples: 460478. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:42:41,333][00620] Avg episode reward: [(0, '6.522')] +[2024-12-16 17:42:46,328][00620] Fps is (10 sec: 4095.1, 60 sec: 3959.3, 300 sec: 3901.6). Total num frames: 1867776. Throughput: 0: 1011.3. Samples: 467030. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-16 17:42:46,333][00620] Avg episode reward: [(0, '6.532')] +[2024-12-16 17:42:51,171][02773] Updated weights for policy 0, policy_version 460 (0.0014) +[2024-12-16 17:42:51,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 1884160. Throughput: 0: 955.3. Samples: 471474. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:42:51,330][00620] Avg episode reward: [(0, '6.644')] +[2024-12-16 17:42:56,326][00620] Fps is (10 sec: 3687.2, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 1904640. Throughput: 0: 948.1. Samples: 474840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:42:56,328][00620] Avg episode reward: [(0, '7.070')] +[2024-12-16 17:42:56,339][02760] Saving new best policy, reward=7.070! +[2024-12-16 17:43:00,252][02773] Updated weights for policy 0, policy_version 470 (0.0017) +[2024-12-16 17:43:01,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 1925120. Throughput: 0: 996.8. Samples: 481618. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:43:01,331][00620] Avg episode reward: [(0, '7.480')] +[2024-12-16 17:43:01,350][02760] Saving new best policy, reward=7.480! +[2024-12-16 17:43:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3887.7). Total num frames: 1941504. Throughput: 0: 963.3. Samples: 486128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:43:06,328][00620] Avg episode reward: [(0, '7.475')] +[2024-12-16 17:43:11,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3823.3, 300 sec: 3901.6). Total num frames: 1961984. Throughput: 0: 946.4. Samples: 488766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:43:11,331][00620] Avg episode reward: [(0, '7.854')] +[2024-12-16 17:43:11,339][02760] Saving new best policy, reward=7.854! +[2024-12-16 17:43:11,966][02773] Updated weights for policy 0, policy_version 480 (0.0019) +[2024-12-16 17:43:16,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3929.4). Total num frames: 1986560. Throughput: 0: 978.3. Samples: 495666. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:43:16,332][00620] Avg episode reward: [(0, '7.129')] +[2024-12-16 17:43:21,326][00620] Fps is (10 sec: 4095.9, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 2002944. Throughput: 0: 986.1. Samples: 501394. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:43:21,331][00620] Avg episode reward: [(0, '6.699')] +[2024-12-16 17:43:22,456][02773] Updated weights for policy 0, policy_version 490 (0.0022) +[2024-12-16 17:43:26,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 2019328. Throughput: 0: 958.6. Samples: 503614. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:43:26,332][00620] Avg episode reward: [(0, '6.627')] +[2024-12-16 17:43:31,326][00620] Fps is (10 sec: 4096.1, 60 sec: 3959.6, 300 sec: 3929.4). Total num frames: 2043904. Throughput: 0: 963.4. Samples: 510382. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:43:31,327][00620] Avg episode reward: [(0, '7.527')] +[2024-12-16 17:43:31,829][02773] Updated weights for policy 0, policy_version 500 (0.0018) +[2024-12-16 17:43:36,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 2064384. Throughput: 0: 1014.6. Samples: 517132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:43:36,329][00620] Avg episode reward: [(0, '7.812')] +[2024-12-16 17:43:41,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 2080768. Throughput: 0: 987.0. Samples: 519256. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:43:41,328][00620] Avg episode reward: [(0, '8.767')] +[2024-12-16 17:43:41,342][02760] Saving new best policy, reward=8.767! +[2024-12-16 17:43:43,077][02773] Updated weights for policy 0, policy_version 510 (0.0032) +[2024-12-16 17:43:46,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3929.4). Total num frames: 2105344. Throughput: 0: 967.3. Samples: 525146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:43:46,331][00620] Avg episode reward: [(0, '9.266')] +[2024-12-16 17:43:46,335][02760] Saving new best policy, reward=9.266! +[2024-12-16 17:43:51,326][00620] Fps is (10 sec: 4505.5, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 2125824. Throughput: 0: 1022.5. Samples: 532140. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:43:51,336][00620] Avg episode reward: [(0, '9.929')] +[2024-12-16 17:43:51,349][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000519_2125824.pth... +[2024-12-16 17:43:51,495][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000289_1183744.pth +[2024-12-16 17:43:51,514][02760] Saving new best policy, reward=9.929! +[2024-12-16 17:43:51,847][02773] Updated weights for policy 0, policy_version 520 (0.0019) +[2024-12-16 17:43:56,330][00620] Fps is (10 sec: 3684.9, 60 sec: 3959.2, 300 sec: 3915.4). Total num frames: 2142208. Throughput: 0: 1014.2. Samples: 534408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:43:56,332][00620] Avg episode reward: [(0, '9.540')] +[2024-12-16 17:44:01,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 2158592. Throughput: 0: 966.0. Samples: 539136. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:44:01,334][00620] Avg episode reward: [(0, '8.691')] +[2024-12-16 17:44:03,358][02773] Updated weights for policy 0, policy_version 530 (0.0021) +[2024-12-16 17:44:06,326][00620] Fps is (10 sec: 4097.7, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 2183168. Throughput: 0: 996.9. Samples: 546256. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:44:06,330][00620] Avg episode reward: [(0, '9.081')] +[2024-12-16 17:44:11,330][00620] Fps is (10 sec: 4094.4, 60 sec: 3959.2, 300 sec: 3915.4). Total num frames: 2199552. Throughput: 0: 1021.7. Samples: 549594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:44:11,334][00620] Avg episode reward: [(0, '8.777')] +[2024-12-16 17:44:14,346][02773] Updated weights for policy 0, policy_version 540 (0.0022) +[2024-12-16 17:44:16,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3887.8). Total num frames: 2215936. Throughput: 0: 965.6. Samples: 553836. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:44:16,327][00620] Avg episode reward: [(0, '10.153')] +[2024-12-16 17:44:16,332][02760] Saving new best policy, reward=10.153! +[2024-12-16 17:44:21,326][00620] Fps is (10 sec: 4097.7, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 2240512. Throughput: 0: 960.0. Samples: 560334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:44:21,328][00620] Avg episode reward: [(0, '10.793')] +[2024-12-16 17:44:21,336][02760] Saving new best policy, reward=10.793! +[2024-12-16 17:44:23,929][02773] Updated weights for policy 0, policy_version 550 (0.0014) +[2024-12-16 17:44:26,326][00620] Fps is (10 sec: 4505.7, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 2260992. Throughput: 0: 987.2. Samples: 563680. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:44:26,328][00620] Avg episode reward: [(0, '11.176')] +[2024-12-16 17:44:26,333][02760] Saving new best policy, reward=11.176! +[2024-12-16 17:44:31,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 2273280. Throughput: 0: 962.9. Samples: 568476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:44:31,328][00620] Avg episode reward: [(0, '10.250')] +[2024-12-16 17:44:35,829][02773] Updated weights for policy 0, policy_version 560 (0.0038) +[2024-12-16 17:44:36,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 2293760. Throughput: 0: 932.4. Samples: 574098. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:44:36,328][00620] Avg episode reward: [(0, '9.650')] +[2024-12-16 17:44:41,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 2318336. Throughput: 0: 961.8. Samples: 577686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:44:41,328][00620] Avg episode reward: [(0, '11.084')] +[2024-12-16 17:44:45,285][02773] Updated weights for policy 0, policy_version 570 (0.0027) +[2024-12-16 17:44:46,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 2334720. Throughput: 0: 994.6. Samples: 583894. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:44:46,334][00620] Avg episode reward: [(0, '11.300')] +[2024-12-16 17:44:46,337][02760] Saving new best policy, reward=11.300! +[2024-12-16 17:44:51,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 2351104. Throughput: 0: 942.1. Samples: 588652. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:44:51,333][00620] Avg episode reward: [(0, '11.609')] +[2024-12-16 17:44:51,344][02760] Saving new best policy, reward=11.609! +[2024-12-16 17:44:55,759][02773] Updated weights for policy 0, policy_version 580 (0.0016) +[2024-12-16 17:44:56,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3891.5, 300 sec: 3929.4). Total num frames: 2375680. Throughput: 0: 946.5. Samples: 592182. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:44:56,333][00620] Avg episode reward: [(0, '11.078')] +[2024-12-16 17:45:01,326][00620] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 2396160. Throughput: 0: 1011.7. Samples: 599362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:01,333][00620] Avg episode reward: [(0, '11.203')] +[2024-12-16 17:45:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 2412544. Throughput: 0: 962.4. Samples: 603644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:45:06,329][00620] Avg episode reward: [(0, '12.103')] +[2024-12-16 17:45:06,334][02760] Saving new best policy, reward=12.103! +[2024-12-16 17:45:07,286][02773] Updated weights for policy 0, policy_version 590 (0.0049) +[2024-12-16 17:45:11,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.5, 300 sec: 3929.4). Total num frames: 2433024. Throughput: 0: 956.2. Samples: 606708. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:45:11,331][00620] Avg episode reward: [(0, '12.097')] +[2024-12-16 17:45:15,737][02773] Updated weights for policy 0, policy_version 600 (0.0013) +[2024-12-16 17:45:16,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 2457600. Throughput: 0: 1007.9. Samples: 613830. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:16,329][00620] Avg episode reward: [(0, '11.966')] +[2024-12-16 17:45:21,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 2473984. Throughput: 0: 1002.7. Samples: 619220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:45:21,328][00620] Avg episode reward: [(0, '11.187')] +[2024-12-16 17:45:26,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 2494464. Throughput: 0: 973.7. Samples: 621504. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:26,333][00620] Avg episode reward: [(0, '11.054')] +[2024-12-16 17:45:26,919][02773] Updated weights for policy 0, policy_version 610 (0.0018) +[2024-12-16 17:45:31,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 2519040. Throughput: 0: 997.5. Samples: 628780. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:31,331][00620] Avg episode reward: [(0, '12.192')] +[2024-12-16 17:45:31,338][02760] Saving new best policy, reward=12.192! +[2024-12-16 17:45:36,328][00620] Fps is (10 sec: 4095.2, 60 sec: 4027.6, 300 sec: 3929.4). Total num frames: 2535424. Throughput: 0: 1027.0. Samples: 634868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:36,330][00620] Avg episode reward: [(0, '13.304')] +[2024-12-16 17:45:36,334][02760] Saving new best policy, reward=13.304! +[2024-12-16 17:45:36,855][02773] Updated weights for policy 0, policy_version 620 (0.0013) +[2024-12-16 17:45:41,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 2551808. Throughput: 0: 995.5. Samples: 636980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:41,328][00620] Avg episode reward: [(0, '14.417')] +[2024-12-16 17:45:41,337][02760] Saving new best policy, reward=14.417! +[2024-12-16 17:45:46,326][00620] Fps is (10 sec: 4096.8, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 2576384. Throughput: 0: 976.5. Samples: 643306. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:46,333][00620] Avg episode reward: [(0, '15.637')] +[2024-12-16 17:45:46,336][02760] Saving new best policy, reward=15.637! +[2024-12-16 17:45:47,074][02773] Updated weights for policy 0, policy_version 630 (0.0022) +[2024-12-16 17:45:51,329][00620] Fps is (10 sec: 4913.5, 60 sec: 4164.0, 300 sec: 3957.1). Total num frames: 2600960. Throughput: 0: 1040.5. Samples: 650468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:45:51,332][00620] Avg episode reward: [(0, '14.699')] +[2024-12-16 17:45:51,345][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000635_2600960.pth... +[2024-12-16 17:45:51,475][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000404_1654784.pth +[2024-12-16 17:45:56,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 2613248. Throughput: 0: 1019.8. Samples: 652600. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:45:56,334][00620] Avg episode reward: [(0, '14.164')] +[2024-12-16 17:45:58,400][02773] Updated weights for policy 0, policy_version 640 (0.0025) +[2024-12-16 17:46:01,326][00620] Fps is (10 sec: 3277.9, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 2633728. Throughput: 0: 976.5. Samples: 657774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:46:01,333][00620] Avg episode reward: [(0, '15.104')] +[2024-12-16 17:46:06,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 2658304. Throughput: 0: 1016.2. Samples: 664948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:06,331][00620] Avg episode reward: [(0, '14.076')] +[2024-12-16 17:46:07,003][02773] Updated weights for policy 0, policy_version 650 (0.0029) +[2024-12-16 17:46:11,326][00620] Fps is (10 sec: 4095.9, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 2674688. Throughput: 0: 1038.8. Samples: 668252. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:46:11,333][00620] Avg episode reward: [(0, '14.255')] +[2024-12-16 17:46:16,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 2695168. Throughput: 0: 976.7. Samples: 672730. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:16,331][00620] Avg episode reward: [(0, '15.884')] +[2024-12-16 17:46:16,334][02760] Saving new best policy, reward=15.884! +[2024-12-16 17:46:18,165][02773] Updated weights for policy 0, policy_version 660 (0.0018) +[2024-12-16 17:46:21,326][00620] Fps is (10 sec: 4096.1, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 2715648. Throughput: 0: 1001.6. Samples: 679938. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:46:21,332][00620] Avg episode reward: [(0, '16.253')] +[2024-12-16 17:46:21,340][02760] Saving new best policy, reward=16.253! +[2024-12-16 17:46:26,326][00620] Fps is (10 sec: 4505.3, 60 sec: 4096.0, 300 sec: 3957.1). Total num frames: 2740224. Throughput: 0: 1034.8. Samples: 683548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:26,329][00620] Avg episode reward: [(0, '17.621')] +[2024-12-16 17:46:26,335][02760] Saving new best policy, reward=17.621! +[2024-12-16 17:46:27,358][02773] Updated weights for policy 0, policy_version 670 (0.0028) +[2024-12-16 17:46:31,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 2752512. Throughput: 0: 1003.2. Samples: 688452. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:31,332][00620] Avg episode reward: [(0, '17.659')] +[2024-12-16 17:46:31,346][02760] Saving new best policy, reward=17.659! +[2024-12-16 17:46:36,326][00620] Fps is (10 sec: 3686.6, 60 sec: 4027.9, 300 sec: 3957.2). Total num frames: 2777088. Throughput: 0: 977.9. Samples: 694472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:46:36,331][00620] Avg episode reward: [(0, '15.943')] +[2024-12-16 17:46:37,903][02773] Updated weights for policy 0, policy_version 680 (0.0035) +[2024-12-16 17:46:41,326][00620] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3971.0). Total num frames: 2801664. Throughput: 0: 1011.0. Samples: 698094. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-12-16 17:46:41,328][00620] Avg episode reward: [(0, '16.242')] +[2024-12-16 17:46:46,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 2818048. Throughput: 0: 1032.0. Samples: 704214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:46:46,330][00620] Avg episode reward: [(0, '16.304')] +[2024-12-16 17:46:48,798][02773] Updated weights for policy 0, policy_version 690 (0.0016) +[2024-12-16 17:46:51,326][00620] Fps is (10 sec: 3276.6, 60 sec: 3891.4, 300 sec: 3943.3). Total num frames: 2834432. Throughput: 0: 989.3. Samples: 709466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:51,331][00620] Avg episode reward: [(0, '16.913')] +[2024-12-16 17:46:56,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3971.0). Total num frames: 2859008. Throughput: 0: 998.1. Samples: 713168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:46:56,329][00620] Avg episode reward: [(0, '17.061')] +[2024-12-16 17:46:57,494][02773] Updated weights for policy 0, policy_version 700 (0.0027) +[2024-12-16 17:47:01,326][00620] Fps is (10 sec: 4505.8, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 2879488. Throughput: 0: 1054.0. Samples: 720160. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-16 17:47:01,333][00620] Avg episode reward: [(0, '17.826')] +[2024-12-16 17:47:01,347][02760] Saving new best policy, reward=17.826! +[2024-12-16 17:47:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 2895872. Throughput: 0: 988.3. Samples: 724412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:47:06,332][00620] Avg episode reward: [(0, '17.499')] +[2024-12-16 17:47:08,708][02773] Updated weights for policy 0, policy_version 710 (0.0024) +[2024-12-16 17:47:11,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3984.9). Total num frames: 2920448. Throughput: 0: 983.5. Samples: 727804. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:47:11,334][00620] Avg episode reward: [(0, '17.220')] +[2024-12-16 17:47:16,326][00620] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3984.9). Total num frames: 2945024. Throughput: 0: 1037.2. Samples: 735124. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:47:16,332][00620] Avg episode reward: [(0, '17.708')] +[2024-12-16 17:47:17,644][02773] Updated weights for policy 0, policy_version 720 (0.0026) +[2024-12-16 17:47:21,326][00620] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 2957312. Throughput: 0: 1013.6. Samples: 740082. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:47:21,331][00620] Avg episode reward: [(0, '18.875')] +[2024-12-16 17:47:21,345][02760] Saving new best policy, reward=18.875! +[2024-12-16 17:47:26,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3971.1). Total num frames: 2977792. Throughput: 0: 986.6. Samples: 742492. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:47:26,333][00620] Avg episode reward: [(0, '18.763')] +[2024-12-16 17:47:28,586][02773] Updated weights for policy 0, policy_version 730 (0.0025) +[2024-12-16 17:47:31,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3984.9). Total num frames: 3002368. Throughput: 0: 1009.3. Samples: 749632. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:47:31,331][00620] Avg episode reward: [(0, '19.966')] +[2024-12-16 17:47:31,341][02760] Saving new best policy, reward=19.966! +[2024-12-16 17:47:36,326][00620] Fps is (10 sec: 4095.8, 60 sec: 4027.7, 300 sec: 3971.0). Total num frames: 3018752. Throughput: 0: 1019.3. Samples: 755336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:47:36,331][00620] Avg episode reward: [(0, '21.072')] +[2024-12-16 17:47:36,336][02760] Saving new best policy, reward=21.072! +[2024-12-16 17:47:40,032][02773] Updated weights for policy 0, policy_version 740 (0.0026) +[2024-12-16 17:47:41,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3957.2). Total num frames: 3035136. Throughput: 0: 982.9. Samples: 757398. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:47:41,327][00620] Avg episode reward: [(0, '20.741')] +[2024-12-16 17:47:46,326][00620] Fps is (10 sec: 4096.2, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 3059712. Throughput: 0: 976.0. Samples: 764080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:47:46,330][00620] Avg episode reward: [(0, '20.346')] +[2024-12-16 17:47:48,564][02773] Updated weights for policy 0, policy_version 750 (0.0019) +[2024-12-16 17:47:51,331][00620] Fps is (10 sec: 4503.3, 60 sec: 4095.7, 300 sec: 3984.9). Total num frames: 3080192. Throughput: 0: 1035.8. Samples: 771028. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:47:51,333][00620] Avg episode reward: [(0, '20.436')] +[2024-12-16 17:47:51,355][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000752_3080192.pth... +[2024-12-16 17:47:51,551][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000519_2125824.pth +[2024-12-16 17:47:56,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3971.0). Total num frames: 3096576. Throughput: 0: 1007.5. Samples: 773142. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:47:56,330][00620] Avg episode reward: [(0, '18.329')] +[2024-12-16 17:47:59,847][02773] Updated weights for policy 0, policy_version 760 (0.0027) +[2024-12-16 17:48:01,326][00620] Fps is (10 sec: 3688.3, 60 sec: 3959.5, 300 sec: 3984.9). Total num frames: 3117056. Throughput: 0: 971.2. Samples: 778830. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:48:01,335][00620] Avg episode reward: [(0, '16.727')] +[2024-12-16 17:48:06,327][00620] Fps is (10 sec: 4505.1, 60 sec: 4095.9, 300 sec: 3998.8). Total num frames: 3141632. Throughput: 0: 1020.5. Samples: 786004. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:48:06,333][00620] Avg episode reward: [(0, '15.172')] +[2024-12-16 17:48:08,989][02773] Updated weights for policy 0, policy_version 770 (0.0020) +[2024-12-16 17:48:11,326][00620] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3971.0). Total num frames: 3158016. Throughput: 0: 1030.7. Samples: 788872. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:48:11,328][00620] Avg episode reward: [(0, '15.774')] +[2024-12-16 17:48:16,326][00620] Fps is (10 sec: 3686.8, 60 sec: 3891.2, 300 sec: 3984.9). Total num frames: 3178496. Throughput: 0: 976.3. Samples: 793564. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:48:16,328][00620] Avg episode reward: [(0, '16.732')] +[2024-12-16 17:48:19,611][02773] Updated weights for policy 0, policy_version 780 (0.0027) +[2024-12-16 17:48:21,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 3198976. Throughput: 0: 1011.7. Samples: 800864. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:48:21,328][00620] Avg episode reward: [(0, '18.355')] +[2024-12-16 17:48:26,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 3223552. Throughput: 0: 1046.4. Samples: 804488. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:48:26,329][00620] Avg episode reward: [(0, '19.795')] +[2024-12-16 17:48:30,385][02773] Updated weights for policy 0, policy_version 790 (0.0026) +[2024-12-16 17:48:31,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3971.0). Total num frames: 3235840. Throughput: 0: 996.8. Samples: 808934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:48:31,328][00620] Avg episode reward: [(0, '20.144')] +[2024-12-16 17:48:36,326][00620] Fps is (10 sec: 3686.4, 60 sec: 4027.8, 300 sec: 3998.8). Total num frames: 3260416. Throughput: 0: 988.3. Samples: 815496. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:48:36,329][00620] Avg episode reward: [(0, '19.699')] +[2024-12-16 17:48:39,477][02773] Updated weights for policy 0, policy_version 800 (0.0025) +[2024-12-16 17:48:41,326][00620] Fps is (10 sec: 4915.1, 60 sec: 4164.3, 300 sec: 3998.8). Total num frames: 3284992. Throughput: 0: 1020.2. Samples: 819052. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:48:41,328][00620] Avg episode reward: [(0, '19.862')] +[2024-12-16 17:48:46,326][00620] Fps is (10 sec: 3686.1, 60 sec: 3959.4, 300 sec: 3971.0). Total num frames: 3297280. Throughput: 0: 1017.6. Samples: 824624. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-12-16 17:48:46,338][00620] Avg episode reward: [(0, '19.754')] +[2024-12-16 17:48:50,672][02773] Updated weights for policy 0, policy_version 810 (0.0030) +[2024-12-16 17:48:51,326][00620] Fps is (10 sec: 3276.9, 60 sec: 3959.8, 300 sec: 3985.0). Total num frames: 3317760. Throughput: 0: 985.8. Samples: 830366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:48:51,331][00620] Avg episode reward: [(0, '20.274')] +[2024-12-16 17:48:56,326][00620] Fps is (10 sec: 4505.9, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 3342336. Throughput: 0: 1004.8. Samples: 834086. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:48:56,331][00620] Avg episode reward: [(0, '21.497')] +[2024-12-16 17:48:56,381][02760] Saving new best policy, reward=21.497! +[2024-12-16 17:48:59,398][02773] Updated weights for policy 0, policy_version 820 (0.0021) +[2024-12-16 17:49:01,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 3362816. Throughput: 0: 1043.7. Samples: 840532. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:49:01,329][00620] Avg episode reward: [(0, '22.302')] +[2024-12-16 17:49:01,343][02760] Saving new best policy, reward=22.302! +[2024-12-16 17:49:06,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3998.9). Total num frames: 3379200. Throughput: 0: 983.2. Samples: 845108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:49:06,327][00620] Avg episode reward: [(0, '22.193')] +[2024-12-16 17:49:10,356][02773] Updated weights for policy 0, policy_version 830 (0.0032) +[2024-12-16 17:49:11,326][00620] Fps is (10 sec: 4095.9, 60 sec: 4096.0, 300 sec: 4026.6). Total num frames: 3403776. Throughput: 0: 983.2. Samples: 848732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:11,328][00620] Avg episode reward: [(0, '23.252')] +[2024-12-16 17:49:11,338][02760] Saving new best policy, reward=23.252! +[2024-12-16 17:49:16,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 3424256. Throughput: 0: 1048.1. Samples: 856100. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:16,335][00620] Avg episode reward: [(0, '22.559')] +[2024-12-16 17:49:20,905][02773] Updated weights for policy 0, policy_version 840 (0.0020) +[2024-12-16 17:49:21,326][00620] Fps is (10 sec: 3686.5, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 3440640. Throughput: 0: 1006.1. Samples: 860772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:21,329][00620] Avg episode reward: [(0, '21.599')] +[2024-12-16 17:49:26,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 3461120. Throughput: 0: 991.3. Samples: 863660. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:26,329][00620] Avg episode reward: [(0, '18.889')] +[2024-12-16 17:49:29,877][02773] Updated weights for policy 0, policy_version 850 (0.0013) +[2024-12-16 17:49:31,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 4040.5). Total num frames: 3485696. Throughput: 0: 1031.2. Samples: 871026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:49:31,330][00620] Avg episode reward: [(0, '18.999')] +[2024-12-16 17:49:36,328][00620] Fps is (10 sec: 4095.2, 60 sec: 4027.6, 300 sec: 4012.7). Total num frames: 3502080. Throughput: 0: 1025.1. Samples: 876498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:36,332][00620] Avg episode reward: [(0, '17.958')] +[2024-12-16 17:49:41,143][02773] Updated weights for policy 0, policy_version 860 (0.0028) +[2024-12-16 17:49:41,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 3522560. Throughput: 0: 991.4. Samples: 878698. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:49:41,328][00620] Avg episode reward: [(0, '17.587')] +[2024-12-16 17:49:46,326][00620] Fps is (10 sec: 4506.5, 60 sec: 4164.3, 300 sec: 4054.3). Total num frames: 3547136. Throughput: 0: 1006.8. Samples: 885838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:46,328][00620] Avg episode reward: [(0, '19.701')] +[2024-12-16 17:49:49,548][02773] Updated weights for policy 0, policy_version 870 (0.0019) +[2024-12-16 17:49:51,331][00620] Fps is (10 sec: 4503.3, 60 sec: 4163.9, 300 sec: 4040.4). Total num frames: 3567616. Throughput: 0: 1054.1. Samples: 892548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:51,333][00620] Avg episode reward: [(0, '19.987')] +[2024-12-16 17:49:51,346][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000871_3567616.pth... +[2024-12-16 17:49:51,529][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000635_2600960.pth +[2024-12-16 17:49:56,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 4012.7). Total num frames: 3579904. Throughput: 0: 1020.7. Samples: 894662. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:49:56,328][00620] Avg episode reward: [(0, '20.286')] +[2024-12-16 17:50:00,895][02773] Updated weights for policy 0, policy_version 880 (0.0030) +[2024-12-16 17:50:01,326][00620] Fps is (10 sec: 3688.3, 60 sec: 4027.7, 300 sec: 4040.5). Total num frames: 3604480. Throughput: 0: 989.9. Samples: 900646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:50:01,328][00620] Avg episode reward: [(0, '21.706')] +[2024-12-16 17:50:06,326][00620] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 4054.3). Total num frames: 3629056. Throughput: 0: 1046.2. Samples: 907852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:50:06,331][00620] Avg episode reward: [(0, '22.206')] +[2024-12-16 17:50:11,214][02773] Updated weights for policy 0, policy_version 890 (0.0029) +[2024-12-16 17:50:11,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 4026.6). Total num frames: 3645440. Throughput: 0: 1036.0. Samples: 910282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:50:11,331][00620] Avg episode reward: [(0, '21.689')] +[2024-12-16 17:50:16,326][00620] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 4040.5). Total num frames: 3665920. Throughput: 0: 986.4. Samples: 915414. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:50:16,328][00620] Avg episode reward: [(0, '22.142')] +[2024-12-16 17:50:20,357][02773] Updated weights for policy 0, policy_version 900 (0.0016) +[2024-12-16 17:50:21,326][00620] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 4054.3). Total num frames: 3690496. Throughput: 0: 1029.8. Samples: 922836. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:50:21,328][00620] Avg episode reward: [(0, '22.508')] +[2024-12-16 17:50:26,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 4026.6). Total num frames: 3706880. Throughput: 0: 1049.4. Samples: 925920. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:50:26,330][00620] Avg episode reward: [(0, '20.926')] +[2024-12-16 17:50:31,326][00620] Fps is (10 sec: 3276.7, 60 sec: 3959.4, 300 sec: 4026.6). Total num frames: 3723264. Throughput: 0: 989.3. Samples: 930358. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-16 17:50:31,333][00620] Avg episode reward: [(0, '20.943')] +[2024-12-16 17:50:31,821][02773] Updated weights for policy 0, policy_version 910 (0.0034) +[2024-12-16 17:50:36,326][00620] Fps is (10 sec: 3686.4, 60 sec: 4027.9, 300 sec: 4040.5). Total num frames: 3743744. Throughput: 0: 984.2. Samples: 936834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:50:36,327][00620] Avg episode reward: [(0, '22.256')] +[2024-12-16 17:50:40,721][02773] Updated weights for policy 0, policy_version 920 (0.0014) +[2024-12-16 17:50:41,326][00620] Fps is (10 sec: 4505.7, 60 sec: 4096.0, 300 sec: 4040.5). Total num frames: 3768320. Throughput: 0: 1018.0. Samples: 940470. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:50:41,334][00620] Avg episode reward: [(0, '21.123')] +[2024-12-16 17:50:46,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3998.9). Total num frames: 3780608. Throughput: 0: 999.9. Samples: 945642. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:50:46,330][00620] Avg episode reward: [(0, '20.466')] +[2024-12-16 17:50:51,326][00620] Fps is (10 sec: 3686.4, 60 sec: 3959.8, 300 sec: 4040.5). Total num frames: 3805184. Throughput: 0: 972.0. Samples: 951590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:50:51,331][00620] Avg episode reward: [(0, '19.681')] +[2024-12-16 17:50:51,965][02773] Updated weights for policy 0, policy_version 930 (0.0019) +[2024-12-16 17:50:56,326][00620] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 4054.3). Total num frames: 3829760. Throughput: 0: 996.2. Samples: 955112. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-16 17:50:56,330][00620] Avg episode reward: [(0, '17.861')] +[2024-12-16 17:51:01,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 4026.6). Total num frames: 3846144. Throughput: 0: 1019.0. Samples: 961270. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:51:01,332][00620] Avg episode reward: [(0, '16.644')] +[2024-12-16 17:51:02,651][02773] Updated weights for policy 0, policy_version 940 (0.0026) +[2024-12-16 17:51:06,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 4026.6). Total num frames: 3862528. Throughput: 0: 962.5. Samples: 966148. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-16 17:51:06,335][00620] Avg episode reward: [(0, '18.436')] +[2024-12-16 17:51:11,326][00620] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 4040.5). Total num frames: 3887104. Throughput: 0: 971.9. Samples: 969654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-16 17:51:11,327][00620] Avg episode reward: [(0, '20.501')] +[2024-12-16 17:51:12,099][02773] Updated weights for policy 0, policy_version 950 (0.0013) +[2024-12-16 17:51:16,332][00620] Fps is (10 sec: 4502.9, 60 sec: 4027.3, 300 sec: 4040.4). Total num frames: 3907584. Throughput: 0: 1029.3. Samples: 976682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:51:16,336][00620] Avg episode reward: [(0, '20.755')] +[2024-12-16 17:51:21,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3998.8). Total num frames: 3919872. Throughput: 0: 975.2. Samples: 980720. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:51:21,332][00620] Avg episode reward: [(0, '21.945')] +[2024-12-16 17:51:25,279][02773] Updated weights for policy 0, policy_version 960 (0.0016) +[2024-12-16 17:51:26,326][00620] Fps is (10 sec: 2458.9, 60 sec: 3754.6, 300 sec: 3998.8). Total num frames: 3932160. Throughput: 0: 935.7. Samples: 982576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-16 17:51:26,329][00620] Avg episode reward: [(0, '23.298')] +[2024-12-16 17:51:26,331][02760] Saving new best policy, reward=23.298! +[2024-12-16 17:51:31,326][00620] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3984.9). Total num frames: 3952640. Throughput: 0: 934.0. Samples: 987674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:51:31,333][00620] Avg episode reward: [(0, '23.455')] +[2024-12-16 17:51:31,344][02760] Saving new best policy, reward=23.455! +[2024-12-16 17:51:36,329][00620] Fps is (10 sec: 3686.1, 60 sec: 3754.6, 300 sec: 3957.1). Total num frames: 3969024. Throughput: 0: 921.6. Samples: 993062. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-16 17:51:36,332][00620] Avg episode reward: [(0, '22.148')] +[2024-12-16 17:51:36,797][02773] Updated weights for policy 0, policy_version 970 (0.0021) +[2024-12-16 17:51:41,333][00620] Fps is (10 sec: 3683.9, 60 sec: 3686.0, 300 sec: 3970.9). Total num frames: 3989504. Throughput: 0: 892.8. Samples: 995292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-16 17:51:41,336][00620] Avg episode reward: [(0, '21.761')] +[2024-12-16 17:51:44,746][02760] Stopping Batcher_0... +[2024-12-16 17:51:44,746][02760] Loop batcher_evt_loop terminating... +[2024-12-16 17:51:44,748][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-16 17:51:44,747][00620] Component Batcher_0 stopped! +[2024-12-16 17:51:44,813][02773] Weights refcount: 2 0 +[2024-12-16 17:51:44,817][00620] Component InferenceWorker_p0-w0 stopped! +[2024-12-16 17:51:44,824][02773] Stopping InferenceWorker_p0-w0... +[2024-12-16 17:51:44,825][02773] Loop inference_proc0-0_evt_loop terminating... +[2024-12-16 17:51:44,863][02760] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000752_3080192.pth +[2024-12-16 17:51:44,881][02760] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-16 17:51:45,052][02760] Stopping LearnerWorker_p0... +[2024-12-16 17:51:45,058][02760] Loop learner_proc0_evt_loop terminating... +[2024-12-16 17:51:45,053][00620] Component LearnerWorker_p0 stopped! +[2024-12-16 17:51:45,160][00620] Component RolloutWorker_w7 stopped! +[2024-12-16 17:51:45,161][02780] Stopping RolloutWorker_w7... +[2024-12-16 17:51:45,169][02780] Loop rollout_proc7_evt_loop terminating... +[2024-12-16 17:51:45,175][00620] Component RolloutWorker_w1 stopped! +[2024-12-16 17:51:45,180][02775] Stopping RolloutWorker_w1... +[2024-12-16 17:51:45,187][02775] Loop rollout_proc1_evt_loop terminating... +[2024-12-16 17:51:45,201][00620] Component RolloutWorker_w5 stopped! +[2024-12-16 17:51:45,205][02781] Stopping RolloutWorker_w5... +[2024-12-16 17:51:45,210][00620] Component RolloutWorker_w3 stopped! +[2024-12-16 17:51:45,219][02777] Stopping RolloutWorker_w3... +[2024-12-16 17:51:45,220][02777] Loop rollout_proc3_evt_loop terminating... +[2024-12-16 17:51:45,221][02781] Loop rollout_proc5_evt_loop terminating... +[2024-12-16 17:51:45,235][02776] Stopping RolloutWorker_w2... +[2024-12-16 17:51:45,237][02776] Loop rollout_proc2_evt_loop terminating... +[2024-12-16 17:51:45,235][00620] Component RolloutWorker_w2 stopped! +[2024-12-16 17:51:45,241][00620] Component RolloutWorker_w0 stopped! +[2024-12-16 17:51:45,241][02774] Stopping RolloutWorker_w0... +[2024-12-16 17:51:45,245][02774] Loop rollout_proc0_evt_loop terminating... +[2024-12-16 17:51:45,264][02778] Stopping RolloutWorker_w4... +[2024-12-16 17:51:45,263][00620] Component RolloutWorker_w4 stopped! +[2024-12-16 17:51:45,269][00620] Component RolloutWorker_w6 stopped! +[2024-12-16 17:51:45,272][00620] Waiting for process learner_proc0 to stop... +[2024-12-16 17:51:45,274][02779] Stopping RolloutWorker_w6... +[2024-12-16 17:51:45,277][02778] Loop rollout_proc4_evt_loop terminating... +[2024-12-16 17:51:45,275][02779] Loop rollout_proc6_evt_loop terminating... +[2024-12-16 17:51:46,676][00620] Waiting for process inference_proc0-0 to join... +[2024-12-16 17:51:46,682][00620] Waiting for process rollout_proc0 to join... +[2024-12-16 17:51:48,663][00620] Waiting for process rollout_proc1 to join... +[2024-12-16 17:51:48,668][00620] Waiting for process rollout_proc2 to join... +[2024-12-16 17:51:48,674][00620] Waiting for process rollout_proc3 to join... +[2024-12-16 17:51:48,677][00620] Waiting for process rollout_proc4 to join... +[2024-12-16 17:51:48,680][00620] Waiting for process rollout_proc5 to join... +[2024-12-16 17:51:48,685][00620] Waiting for process rollout_proc6 to join... +[2024-12-16 17:51:48,688][00620] Waiting for process rollout_proc7 to join... +[2024-12-16 17:51:48,691][00620] Batcher 0 profile tree view: +batching: 26.6708, releasing_batches: 0.0277 +[2024-12-16 17:51:48,693][00620] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0039 + wait_policy_total: 408.3329 +update_model: 8.7039 + weight_update: 0.0023 +one_step: 0.0024 + handle_policy_step: 578.9128 + deserialize: 14.7546, stack: 3.1644, obs_to_device_normalize: 123.2854, forward: 289.9513, send_messages: 28.4268 + prepare_outputs: 90.1056 + to_cpu: 54.4302 +[2024-12-16 17:51:48,695][00620] Learner 0 profile tree view: +misc: 0.0051, prepare_batch: 13.4180 +train: 73.0813 + epoch_init: 0.0068, minibatch_init: 0.0087, losses_postprocess: 0.6702, kl_divergence: 0.6730, after_optimizer: 33.7727 + calculate_losses: 25.5520 + losses_init: 0.0035, forward_head: 1.3109, bptt_initial: 17.1198, tail: 1.0581, advantages_returns: 0.2594, losses: 3.5608 + bptt: 1.9495 + bptt_forward_core: 1.8432 + update: 11.7980 + clip: 0.9139 +[2024-12-16 17:51:48,696][00620] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.2619, enqueue_policy_requests: 97.0692, env_step: 811.1127, overhead: 12.5369, complete_rollouts: 7.8055 +save_policy_outputs: 21.0393 + split_output_tensors: 8.4012 +[2024-12-16 17:51:48,697][00620] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3383, enqueue_policy_requests: 102.7976, env_step: 810.6583, overhead: 12.5478, complete_rollouts: 6.5921 +save_policy_outputs: 20.0538 + split_output_tensors: 8.0138 +[2024-12-16 17:51:48,699][00620] Loop Runner_EvtLoop terminating... +[2024-12-16 17:51:48,700][00620] Runner profile tree view: +main_loop: 1068.4264 +[2024-12-16 17:51:48,701][00620] Collected {0: 4005888}, FPS: 3749.3 +[2024-12-16 17:51:48,744][00620] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-16 17:51:48,745][00620] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-16 17:51:48,747][00620] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-16 17:51:48,748][00620] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-16 17:51:48,749][00620] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-16 17:51:48,751][00620] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-16 17:51:48,753][00620] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-12-16 17:51:48,754][00620] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-16 17:51:48,755][00620] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-12-16 17:51:48,756][00620] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-12-16 17:51:48,758][00620] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-16 17:51:48,759][00620] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-16 17:51:48,760][00620] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-16 17:51:48,761][00620] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-16 17:51:48,762][00620] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-16 17:51:48,801][00620] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-16 17:51:48,804][00620] RunningMeanStd input shape: (3, 72, 128) +[2024-12-16 17:51:48,807][00620] RunningMeanStd input shape: (1,) +[2024-12-16 17:51:48,822][00620] ConvEncoder: input_channels=3 +[2024-12-16 17:51:48,947][00620] Conv encoder output size: 512 +[2024-12-16 17:51:48,948][00620] Policy head output size: 512 +[2024-12-16 17:51:49,195][00620] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-16 17:51:50,040][00620] Num frames 100... +[2024-12-16 17:51:50,207][00620] Num frames 200... +[2024-12-16 17:51:50,369][00620] Num frames 300... +[2024-12-16 17:51:50,537][00620] Num frames 400... +[2024-12-16 17:51:50,701][00620] Num frames 500... +[2024-12-16 17:51:50,872][00620] Num frames 600... +[2024-12-16 17:51:51,051][00620] Num frames 700... +[2024-12-16 17:51:51,211][00620] Num frames 800... +[2024-12-16 17:51:51,384][00620] Num frames 900... +[2024-12-16 17:51:51,559][00620] Avg episode rewards: #0: 19.560, true rewards: #0: 9.560 +[2024-12-16 17:51:51,561][00620] Avg episode reward: 19.560, avg true_objective: 9.560 +[2024-12-16 17:51:51,637][00620] Num frames 1000... +[2024-12-16 17:51:51,815][00620] Num frames 1100... +[2024-12-16 17:51:51,990][00620] Num frames 1200... +[2024-12-16 17:51:52,181][00620] Num frames 1300... +[2024-12-16 17:51:52,353][00620] Num frames 1400... +[2024-12-16 17:51:52,479][00620] Num frames 1500... +[2024-12-16 17:51:52,603][00620] Num frames 1600... +[2024-12-16 17:51:52,725][00620] Num frames 1700... +[2024-12-16 17:51:52,851][00620] Num frames 1800... +[2024-12-16 17:51:52,977][00620] Num frames 1900... +[2024-12-16 17:51:53,109][00620] Num frames 2000... +[2024-12-16 17:51:53,232][00620] Num frames 2100... +[2024-12-16 17:51:53,329][00620] Avg episode rewards: #0: 23.175, true rewards: #0: 10.675 +[2024-12-16 17:51:53,331][00620] Avg episode reward: 23.175, avg true_objective: 10.675 +[2024-12-16 17:51:53,419][00620] Num frames 2200... +[2024-12-16 17:51:53,552][00620] Num frames 2300... +[2024-12-16 17:51:53,672][00620] Num frames 2400... +[2024-12-16 17:51:53,792][00620] Num frames 2500... +[2024-12-16 17:51:53,871][00620] Avg episode rewards: #0: 16.730, true rewards: #0: 8.397 +[2024-12-16 17:51:53,873][00620] Avg episode reward: 16.730, avg true_objective: 8.397 +[2024-12-16 17:51:53,970][00620] Num frames 2600... +[2024-12-16 17:51:54,105][00620] Num frames 2700... +[2024-12-16 17:51:54,228][00620] Num frames 2800... +[2024-12-16 17:51:54,348][00620] Num frames 2900... +[2024-12-16 17:51:54,474][00620] Num frames 3000... +[2024-12-16 17:51:54,601][00620] Num frames 3100... +[2024-12-16 17:51:54,723][00620] Num frames 3200... +[2024-12-16 17:51:54,849][00620] Num frames 3300... +[2024-12-16 17:51:54,975][00620] Num frames 3400... +[2024-12-16 17:51:55,049][00620] Avg episode rewards: #0: 16.535, true rewards: #0: 8.535 +[2024-12-16 17:51:55,050][00620] Avg episode reward: 16.535, avg true_objective: 8.535 +[2024-12-16 17:51:55,162][00620] Num frames 3500... +[2024-12-16 17:51:55,279][00620] Num frames 3600... +[2024-12-16 17:51:55,397][00620] Num frames 3700... +[2024-12-16 17:51:55,526][00620] Num frames 3800... +[2024-12-16 17:51:55,698][00620] Avg episode rewards: #0: 15.188, true rewards: #0: 7.788 +[2024-12-16 17:51:55,699][00620] Avg episode reward: 15.188, avg true_objective: 7.788 +[2024-12-16 17:51:55,709][00620] Num frames 3900... +[2024-12-16 17:51:55,826][00620] Num frames 4000... +[2024-12-16 17:51:55,947][00620] Num frames 4100... +[2024-12-16 17:51:56,064][00620] Num frames 4200... +[2024-12-16 17:51:56,193][00620] Num frames 4300... +[2024-12-16 17:51:56,313][00620] Num frames 4400... +[2024-12-16 17:51:56,440][00620] Num frames 4500... +[2024-12-16 17:51:56,565][00620] Num frames 4600... +[2024-12-16 17:51:56,741][00620] Avg episode rewards: #0: 15.490, true rewards: #0: 7.823 +[2024-12-16 17:51:56,743][00620] Avg episode reward: 15.490, avg true_objective: 7.823 +[2024-12-16 17:51:56,753][00620] Num frames 4700... +[2024-12-16 17:51:56,885][00620] Num frames 4800... +[2024-12-16 17:51:57,008][00620] Num frames 4900... +[2024-12-16 17:51:57,132][00620] Num frames 5000... +[2024-12-16 17:51:57,262][00620] Num frames 5100... +[2024-12-16 17:51:57,387][00620] Num frames 5200... +[2024-12-16 17:51:57,519][00620] Num frames 5300... +[2024-12-16 17:51:57,645][00620] Num frames 5400... +[2024-12-16 17:51:57,772][00620] Num frames 5500... +[2024-12-16 17:51:57,913][00620] Num frames 5600... +[2024-12-16 17:51:58,054][00620] Num frames 5700... +[2024-12-16 17:51:58,206][00620] Num frames 5800... +[2024-12-16 17:51:58,330][00620] Num frames 5900... +[2024-12-16 17:51:58,457][00620] Num frames 6000... +[2024-12-16 17:51:58,578][00620] Num frames 6100... +[2024-12-16 17:51:58,674][00620] Avg episode rewards: #0: 18.049, true rewards: #0: 8.763 +[2024-12-16 17:51:58,676][00620] Avg episode reward: 18.049, avg true_objective: 8.763 +[2024-12-16 17:51:58,756][00620] Num frames 6200... +[2024-12-16 17:51:58,882][00620] Num frames 6300... +[2024-12-16 17:51:59,001][00620] Num frames 6400... +[2024-12-16 17:51:59,123][00620] Num frames 6500... +[2024-12-16 17:51:59,249][00620] Num frames 6600... +[2024-12-16 17:51:59,374][00620] Num frames 6700... +[2024-12-16 17:51:59,503][00620] Num frames 6800... +[2024-12-16 17:51:59,624][00620] Num frames 6900... +[2024-12-16 17:51:59,749][00620] Num frames 7000... +[2024-12-16 17:51:59,874][00620] Num frames 7100... +[2024-12-16 17:51:59,997][00620] Num frames 7200... +[2024-12-16 17:52:00,118][00620] Num frames 7300... +[2024-12-16 17:52:00,241][00620] Num frames 7400... +[2024-12-16 17:52:00,374][00620] Num frames 7500... +[2024-12-16 17:52:00,501][00620] Num frames 7600... +[2024-12-16 17:52:00,627][00620] Num frames 7700... +[2024-12-16 17:52:00,753][00620] Num frames 7800... +[2024-12-16 17:52:00,879][00620] Num frames 7900... +[2024-12-16 17:52:01,002][00620] Num frames 8000... +[2024-12-16 17:52:01,124][00620] Num frames 8100... +[2024-12-16 17:52:01,252][00620] Num frames 8200... +[2024-12-16 17:52:01,358][00620] Avg episode rewards: #0: 23.042, true rewards: #0: 10.292 +[2024-12-16 17:52:01,359][00620] Avg episode reward: 23.042, avg true_objective: 10.292 +[2024-12-16 17:52:01,441][00620] Num frames 8300... +[2024-12-16 17:52:01,564][00620] Num frames 8400... +[2024-12-16 17:52:01,682][00620] Num frames 8500... +[2024-12-16 17:52:01,803][00620] Num frames 8600... +[2024-12-16 17:52:01,926][00620] Num frames 8700... +[2024-12-16 17:52:02,047][00620] Num frames 8800... +[2024-12-16 17:52:02,168][00620] Num frames 8900... +[2024-12-16 17:52:02,297][00620] Num frames 9000... +[2024-12-16 17:52:02,468][00620] Num frames 9100... +[2024-12-16 17:52:02,637][00620] Num frames 9200... +[2024-12-16 17:52:02,767][00620] Avg episode rewards: #0: 22.826, true rewards: #0: 10.270 +[2024-12-16 17:52:02,769][00620] Avg episode reward: 22.826, avg true_objective: 10.270 +[2024-12-16 17:52:02,872][00620] Num frames 9300... +[2024-12-16 17:52:03,038][00620] Num frames 9400... +[2024-12-16 17:52:03,208][00620] Num frames 9500... +[2024-12-16 17:52:03,390][00620] Num frames 9600... +[2024-12-16 17:52:03,556][00620] Num frames 9700... +[2024-12-16 17:52:03,731][00620] Num frames 9800... +[2024-12-16 17:52:03,909][00620] Num frames 9900... +[2024-12-16 17:52:03,995][00620] Avg episode rewards: #0: 22.015, true rewards: #0: 9.915 +[2024-12-16 17:52:03,997][00620] Avg episode reward: 22.015, avg true_objective: 9.915 +[2024-12-16 17:53:00,745][00620] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-16 17:53:01,189][00620] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-16 17:53:01,191][00620] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-16 17:53:01,193][00620] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-16 17:53:01,195][00620] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-16 17:53:01,196][00620] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-16 17:53:01,198][00620] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-16 17:53:01,200][00620] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-16 17:53:01,202][00620] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-16 17:53:01,202][00620] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-16 17:53:01,204][00620] Adding new argument 'hf_repository'='adrian-nf/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-16 17:53:01,205][00620] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-16 17:53:01,206][00620] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-16 17:53:01,207][00620] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-16 17:53:01,208][00620] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-16 17:53:01,209][00620] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-16 17:53:01,248][00620] RunningMeanStd input shape: (3, 72, 128) +[2024-12-16 17:53:01,250][00620] RunningMeanStd input shape: (1,) +[2024-12-16 17:53:01,268][00620] ConvEncoder: input_channels=3 +[2024-12-16 17:53:01,340][00620] Conv encoder output size: 512 +[2024-12-16 17:53:01,343][00620] Policy head output size: 512 +[2024-12-16 17:53:01,370][00620] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-16 17:53:01,969][00620] Num frames 100... +[2024-12-16 17:53:02,129][00620] Num frames 200... +[2024-12-16 17:53:02,290][00620] Num frames 300... +[2024-12-16 17:53:02,465][00620] Num frames 400... +[2024-12-16 17:53:02,624][00620] Num frames 500... +[2024-12-16 17:53:02,790][00620] Num frames 600... +[2024-12-16 17:53:02,952][00620] Num frames 700... +[2024-12-16 17:53:03,104][00620] Num frames 800... +[2024-12-16 17:53:03,262][00620] Num frames 900... +[2024-12-16 17:53:03,427][00620] Num frames 1000... +[2024-12-16 17:53:03,585][00620] Avg episode rewards: #0: 23.560, true rewards: #0: 10.560 +[2024-12-16 17:53:03,587][00620] Avg episode reward: 23.560, avg true_objective: 10.560 +[2024-12-16 17:53:03,655][00620] Num frames 1100... +[2024-12-16 17:53:03,806][00620] Num frames 1200... +[2024-12-16 17:53:03,983][00620] Num frames 1300... +[2024-12-16 17:53:04,153][00620] Num frames 1400... +[2024-12-16 17:53:04,325][00620] Num frames 1500... +[2024-12-16 17:53:04,497][00620] Num frames 1600... +[2024-12-16 17:53:04,677][00620] Num frames 1700... +[2024-12-16 17:53:04,848][00620] Num frames 1800... +[2024-12-16 17:53:05,019][00620] Num frames 1900... +[2024-12-16 17:53:05,206][00620] Num frames 2000... +[2024-12-16 17:53:05,423][00620] Num frames 2100... +[2024-12-16 17:53:05,625][00620] Num frames 2200... +[2024-12-16 17:53:05,803][00620] Avg episode rewards: #0: 25.800, true rewards: #0: 11.300 +[2024-12-16 17:53:05,805][00620] Avg episode reward: 25.800, avg true_objective: 11.300 +[2024-12-16 17:53:05,883][00620] Num frames 2300... +[2024-12-16 17:53:06,056][00620] Num frames 2400... +[2024-12-16 17:53:06,233][00620] Num frames 2500... +[2024-12-16 17:53:06,425][00620] Num frames 2600... +[2024-12-16 17:53:06,626][00620] Num frames 2700... +[2024-12-16 17:53:06,816][00620] Num frames 2800... +[2024-12-16 17:53:07,009][00620] Avg episode rewards: #0: 20.227, true rewards: #0: 9.560 +[2024-12-16 17:53:07,011][00620] Avg episode reward: 20.227, avg true_objective: 9.560 +[2024-12-16 17:53:07,081][00620] Num frames 2900... +[2024-12-16 17:53:07,277][00620] Num frames 3000... +[2024-12-16 17:53:07,475][00620] Num frames 3100... +[2024-12-16 17:53:07,647][00620] Num frames 3200... +[2024-12-16 17:53:07,842][00620] Num frames 3300... +[2024-12-16 17:53:08,045][00620] Num frames 3400... +[2024-12-16 17:53:08,216][00620] Num frames 3500... +[2024-12-16 17:53:08,293][00620] Avg episode rewards: #0: 19.280, true rewards: #0: 8.780 +[2024-12-16 17:53:08,295][00620] Avg episode reward: 19.280, avg true_objective: 8.780 +[2024-12-16 17:53:08,461][00620] Num frames 3600... +[2024-12-16 17:53:08,659][00620] Num frames 3700... +[2024-12-16 17:53:08,877][00620] Num frames 3800... +[2024-12-16 17:53:09,049][00620] Num frames 3900... +[2024-12-16 17:53:09,222][00620] Num frames 4000... +[2024-12-16 17:53:09,442][00620] Avg episode rewards: #0: 17.776, true rewards: #0: 8.176 +[2024-12-16 17:53:09,444][00620] Avg episode reward: 17.776, avg true_objective: 8.176 +[2024-12-16 17:53:09,475][00620] Num frames 4100... +[2024-12-16 17:53:09,661][00620] Num frames 4200... +[2024-12-16 17:53:09,844][00620] Num frames 4300... +[2024-12-16 17:53:10,025][00620] Num frames 4400... +[2024-12-16 17:53:10,147][00620] Num frames 4500... +[2024-12-16 17:53:10,274][00620] Num frames 4600... +[2024-12-16 17:53:10,404][00620] Num frames 4700... +[2024-12-16 17:53:10,548][00620] Num frames 4800... +[2024-12-16 17:53:10,673][00620] Num frames 4900... +[2024-12-16 17:53:10,804][00620] Num frames 5000... +[2024-12-16 17:53:10,945][00620] Num frames 5100... +[2024-12-16 17:53:11,068][00620] Num frames 5200... +[2024-12-16 17:53:11,193][00620] Num frames 5300... +[2024-12-16 17:53:11,318][00620] Num frames 5400... +[2024-12-16 17:53:11,452][00620] Num frames 5500... +[2024-12-16 17:53:11,596][00620] Num frames 5600... +[2024-12-16 17:53:11,720][00620] Num frames 5700... +[2024-12-16 17:53:11,846][00620] Num frames 5800... +[2024-12-16 17:53:11,983][00620] Num frames 5900... +[2024-12-16 17:53:12,111][00620] Num frames 6000... +[2024-12-16 17:53:12,237][00620] Num frames 6100... +[2024-12-16 17:53:12,404][00620] Avg episode rewards: #0: 22.980, true rewards: #0: 10.313 +[2024-12-16 17:53:12,405][00620] Avg episode reward: 22.980, avg true_objective: 10.313 +[2024-12-16 17:53:12,425][00620] Num frames 6200... +[2024-12-16 17:53:12,577][00620] Num frames 6300... +[2024-12-16 17:53:12,716][00620] Num frames 6400... +[2024-12-16 17:53:12,840][00620] Num frames 6500... +[2024-12-16 17:53:12,977][00620] Num frames 6600... +[2024-12-16 17:53:13,101][00620] Num frames 6700... +[2024-12-16 17:53:13,227][00620] Num frames 6800... +[2024-12-16 17:53:13,351][00620] Num frames 6900... +[2024-12-16 17:53:13,494][00620] Num frames 7000... +[2024-12-16 17:53:13,635][00620] Num frames 7100... +[2024-12-16 17:53:13,759][00620] Num frames 7200... +[2024-12-16 17:53:13,832][00620] Avg episode rewards: #0: 22.731, true rewards: #0: 10.303 +[2024-12-16 17:53:13,833][00620] Avg episode reward: 22.731, avg true_objective: 10.303 +[2024-12-16 17:53:13,951][00620] Num frames 7300... +[2024-12-16 17:53:14,077][00620] Num frames 7400... +[2024-12-16 17:53:14,200][00620] Num frames 7500... +[2024-12-16 17:53:14,321][00620] Num frames 7600... +[2024-12-16 17:53:14,456][00620] Num frames 7700... +[2024-12-16 17:53:14,607][00620] Num frames 7800... +[2024-12-16 17:53:14,734][00620] Num frames 7900... +[2024-12-16 17:53:14,864][00620] Num frames 8000... +[2024-12-16 17:53:14,998][00620] Avg episode rewards: #0: 22.072, true rewards: #0: 10.072 +[2024-12-16 17:53:15,000][00620] Avg episode reward: 22.072, avg true_objective: 10.072 +[2024-12-16 17:53:15,054][00620] Num frames 8100... +[2024-12-16 17:53:15,180][00620] Num frames 8200... +[2024-12-16 17:53:15,302][00620] Num frames 8300... +[2024-12-16 17:53:15,432][00620] Num frames 8400... +[2024-12-16 17:53:15,588][00620] Num frames 8500... +[2024-12-16 17:53:15,721][00620] Num frames 8600... +[2024-12-16 17:53:15,853][00620] Num frames 8700... +[2024-12-16 17:53:15,994][00620] Num frames 8800... +[2024-12-16 17:53:16,133][00620] Num frames 8900... +[2024-12-16 17:53:16,262][00620] Num frames 9000... +[2024-12-16 17:53:16,434][00620] Avg episode rewards: #0: 22.328, true rewards: #0: 10.106 +[2024-12-16 17:53:16,436][00620] Avg episode reward: 22.328, avg true_objective: 10.106 +[2024-12-16 17:53:16,450][00620] Num frames 9100... +[2024-12-16 17:53:16,604][00620] Num frames 9200... +[2024-12-16 17:53:16,743][00620] Num frames 9300... +[2024-12-16 17:53:16,876][00620] Num frames 9400... +[2024-12-16 17:53:17,014][00620] Num frames 9500... +[2024-12-16 17:53:17,144][00620] Num frames 9600... +[2024-12-16 17:53:17,276][00620] Num frames 9700... +[2024-12-16 17:53:17,407][00620] Num frames 9800... +[2024-12-16 17:53:17,558][00620] Avg episode rewards: #0: 21.863, true rewards: #0: 9.863 +[2024-12-16 17:53:17,560][00620] Avg episode reward: 21.863, avg true_objective: 9.863 +[2024-12-16 17:54:16,108][00620] Replay video saved to /content/train_dir/default_experiment/replay.mp4!