diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1782,3 +1782,1220 @@ main_loop: 39.1793 [2024-09-05 09:25:02,001][00556] Avg episode rewards: #0: 18.120, true rewards: #0: 8.820 [2024-09-05 09:25:02,002][00556] Avg episode reward: 18.120, avg true_objective: 8.820 [2024-09-05 09:25:58,542][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-05 09:26:04,429][00556] The model has been pushed to https://huggingface.co/neeldevenshah/rl_course_vizdoom_health_gathering_supreme +[2024-09-05 09:27:51,830][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-05 09:27:51,831][00556] Overriding arg 'train_for_env_steps' with value 8000000 passed from command line +[2024-09-05 09:27:51,838][00556] Experiment dir /content/train_dir/default_experiment already exists! +[2024-09-05 09:27:51,839][00556] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-09-05 09:27:51,840][00556] Weights and Biases integration disabled +[2024-09-05 09:27:51,845][00556] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-09-05 09:27:54,040][00556] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=8000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-09-05 09:27:54,043][00556] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-05 09:27:54,046][00556] Rollout worker 0 uses device cpu +[2024-09-05 09:27:54,049][00556] Rollout worker 1 uses device cpu +[2024-09-05 09:27:54,050][00556] Rollout worker 2 uses device cpu +[2024-09-05 09:27:54,051][00556] Rollout worker 3 uses device cpu +[2024-09-05 09:27:54,053][00556] Rollout worker 4 uses device cpu +[2024-09-05 09:27:54,054][00556] Rollout worker 5 uses device cpu +[2024-09-05 09:27:54,055][00556] Rollout worker 6 uses device cpu +[2024-09-05 09:27:54,056][00556] Rollout worker 7 uses device cpu +[2024-09-05 09:27:54,133][00556] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-05 09:27:54,134][00556] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-05 09:27:54,166][00556] Starting all processes... +[2024-09-05 09:27:54,168][00556] Starting process learner_proc0 +[2024-09-05 09:27:54,216][00556] Starting all processes... +[2024-09-05 09:27:54,223][00556] Starting process inference_proc0-0 +[2024-09-05 09:27:54,224][00556] Starting process rollout_proc0 +[2024-09-05 09:27:54,228][00556] Starting process rollout_proc1 +[2024-09-05 09:27:54,248][00556] Starting process rollout_proc2 +[2024-09-05 09:27:54,249][00556] Starting process rollout_proc3 +[2024-09-05 09:27:54,249][00556] Starting process rollout_proc4 +[2024-09-05 09:27:54,249][00556] Starting process rollout_proc5 +[2024-09-05 09:27:54,251][00556] Starting process rollout_proc6 +[2024-09-05 09:27:54,251][00556] Starting process rollout_proc7 +[2024-09-05 09:28:10,499][15082] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-05 09:28:10,509][15082] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-09-05 09:28:10,581][15082] Num visible devices: 1 +[2024-09-05 09:28:10,620][15082] Starting seed is not provided +[2024-09-05 09:28:10,621][15082] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-05 09:28:10,622][15082] Initializing actor-critic model on device cuda:0 +[2024-09-05 09:28:10,623][15082] RunningMeanStd input shape: (3, 72, 128) +[2024-09-05 09:28:10,625][15082] RunningMeanStd input shape: (1,) +[2024-09-05 09:28:10,780][15082] ConvEncoder: input_channels=3 +[2024-09-05 09:28:10,961][15101] Worker 5 uses CPU cores [1] +[2024-09-05 09:28:11,044][15100] Worker 2 uses CPU cores [0] +[2024-09-05 09:28:11,196][15103] Worker 7 uses CPU cores [1] +[2024-09-05 09:28:11,217][15095] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-05 09:28:11,217][15095] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-09-05 09:28:11,266][15098] Worker 3 uses CPU cores [1] +[2024-09-05 09:28:11,314][15095] Num visible devices: 1 +[2024-09-05 09:28:11,418][15096] Worker 0 uses CPU cores [0] +[2024-09-05 09:28:11,495][15097] Worker 1 uses CPU cores [1] +[2024-09-05 09:28:11,508][15099] Worker 4 uses CPU cores [0] +[2024-09-05 09:28:11,514][15102] Worker 6 uses CPU cores [0] +[2024-09-05 09:28:11,593][15082] Conv encoder output size: 512 +[2024-09-05 09:28:11,594][15082] Policy head output size: 512 +[2024-09-05 09:28:11,623][15082] Created Actor Critic model with architecture: +[2024-09-05 09:28:11,624][15082] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-09-05 09:28:11,796][15082] Using optimizer +[2024-09-05 09:28:12,678][15082] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth... +[2024-09-05 09:28:12,732][15082] Loading model from checkpoint +[2024-09-05 09:28:12,735][15082] Loaded experiment state at self.train_step=981, self.env_steps=4018176 +[2024-09-05 09:28:12,735][15082] Initialized policy 0 weights for model version 981 +[2024-09-05 09:28:12,746][15082] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-05 09:28:12,755][15082] LearnerWorker_p0 finished initialization! +[2024-09-05 09:28:12,989][15095] RunningMeanStd input shape: (3, 72, 128) +[2024-09-05 09:28:12,991][15095] RunningMeanStd input shape: (1,) +[2024-09-05 09:28:13,140][15095] ConvEncoder: input_channels=3 +[2024-09-05 09:28:13,412][15095] Conv encoder output size: 512 +[2024-09-05 09:28:13,413][15095] Policy head output size: 512 +[2024-09-05 09:28:13,498][00556] Inference worker 0-0 is ready! +[2024-09-05 09:28:13,500][00556] All inference workers are ready! Signal rollout workers to start! +[2024-09-05 09:28:13,914][15096] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,888][15099] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,931][15102] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,959][15103] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,982][15098] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,898][15097] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:13,994][15101] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:14,032][15100] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-05 09:28:14,125][00556] Heartbeat connected on Batcher_0 +[2024-09-05 09:28:14,134][00556] Heartbeat connected on LearnerWorker_p0 +[2024-09-05 09:28:14,182][00556] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-05 09:28:16,113][15102] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,121][15099] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,119][15096] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,120][15101] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,125][15103] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,126][15097] Decorrelating experience for 0 frames... +[2024-09-05 09:28:16,845][00556] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4018176. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-05 09:28:17,322][15103] Decorrelating experience for 32 frames... +[2024-09-05 09:28:17,328][15101] Decorrelating experience for 32 frames... +[2024-09-05 09:28:17,392][15098] Decorrelating experience for 0 frames... +[2024-09-05 09:28:17,658][15099] Decorrelating experience for 32 frames... +[2024-09-05 09:28:17,671][15102] Decorrelating experience for 32 frames... +[2024-09-05 09:28:17,676][15096] Decorrelating experience for 32 frames... +[2024-09-05 09:28:17,748][15100] Decorrelating experience for 0 frames... +[2024-09-05 09:28:18,467][15102] Decorrelating experience for 64 frames... +[2024-09-05 09:28:18,702][15103] Decorrelating experience for 64 frames... +[2024-09-05 09:28:18,705][15101] Decorrelating experience for 64 frames... +[2024-09-05 09:28:18,884][15097] Decorrelating experience for 32 frames... +[2024-09-05 09:28:19,296][15102] Decorrelating experience for 96 frames... +[2024-09-05 09:28:19,490][00556] Heartbeat connected on RolloutWorker_w6 +[2024-09-05 09:28:20,257][15099] Decorrelating experience for 64 frames... +[2024-09-05 09:28:20,274][15100] Decorrelating experience for 32 frames... +[2024-09-05 09:28:20,598][15103] Decorrelating experience for 96 frames... +[2024-09-05 09:28:20,613][15101] Decorrelating experience for 96 frames... +[2024-09-05 09:28:20,692][15098] Decorrelating experience for 32 frames... +[2024-09-05 09:28:20,948][00556] Heartbeat connected on RolloutWorker_w5 +[2024-09-05 09:28:20,953][00556] Heartbeat connected on RolloutWorker_w7 +[2024-09-05 09:28:21,199][15097] Decorrelating experience for 64 frames... +[2024-09-05 09:28:21,845][00556] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4018176. Throughput: 0: 2.4. Samples: 12. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-05 09:28:21,852][00556] Avg episode reward: [(0, '1.280')] +[2024-09-05 09:28:22,446][15096] Decorrelating experience for 64 frames... +[2024-09-05 09:28:22,587][15099] Decorrelating experience for 96 frames... +[2024-09-05 09:28:22,859][00556] Heartbeat connected on RolloutWorker_w4 +[2024-09-05 09:28:23,035][15100] Decorrelating experience for 64 frames... +[2024-09-05 09:28:25,744][15098] Decorrelating experience for 64 frames... +[2024-09-05 09:28:26,845][00556] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4018176. Throughput: 0: 199.6. Samples: 1996. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-05 09:28:26,848][00556] Avg episode reward: [(0, '6.100')] +[2024-09-05 09:28:27,105][15082] Signal inference workers to stop experience collection... +[2024-09-05 09:28:27,145][15095] InferenceWorker_p0-w0: stopping experience collection +[2024-09-05 09:28:27,966][15096] Decorrelating experience for 96 frames... +[2024-09-05 09:28:28,785][00556] Heartbeat connected on RolloutWorker_w0 +[2024-09-05 09:28:29,293][15082] Signal inference workers to resume experience collection... +[2024-09-05 09:28:29,294][15095] InferenceWorker_p0-w0: resuming experience collection +[2024-09-05 09:28:29,583][15100] Decorrelating experience for 96 frames... +[2024-09-05 09:28:30,851][00556] Heartbeat connected on RolloutWorker_w2 +[2024-09-05 09:28:31,751][15097] Decorrelating experience for 96 frames... +[2024-09-05 09:28:31,845][00556] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 4026368. Throughput: 0: 198.5. Samples: 2978. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-05 09:28:31,848][00556] Avg episode reward: [(0, '5.377')] +[2024-09-05 09:28:32,295][15098] Decorrelating experience for 96 frames... +[2024-09-05 09:28:32,678][00556] Heartbeat connected on RolloutWorker_w1 +[2024-09-05 09:28:33,251][00556] Heartbeat connected on RolloutWorker_w3 +[2024-09-05 09:28:36,846][00556] Fps is (10 sec: 2047.8, 60 sec: 1023.9, 300 sec: 1023.9). Total num frames: 4038656. Throughput: 0: 220.6. Samples: 4412. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:28:36,855][00556] Avg episode reward: [(0, '9.302')] +[2024-09-05 09:28:41,305][15095] Updated weights for policy 0, policy_version 991 (0.0034) +[2024-09-05 09:28:41,845][00556] Fps is (10 sec: 3276.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 4059136. Throughput: 0: 395.1. Samples: 9878. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-05 09:28:41,848][00556] Avg episode reward: [(0, '10.941')] +[2024-09-05 09:28:46,845][00556] Fps is (10 sec: 4506.1, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 4083712. Throughput: 0: 555.1. Samples: 16654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:28:46,850][00556] Avg episode reward: [(0, '14.391')] +[2024-09-05 09:28:51,845][00556] Fps is (10 sec: 3686.4, 60 sec: 2223.5, 300 sec: 2223.5). Total num frames: 4096000. Throughput: 0: 539.0. Samples: 18864. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:28:51,849][00556] Avg episode reward: [(0, '16.882')] +[2024-09-05 09:28:52,739][15095] Updated weights for policy 0, policy_version 1001 (0.0022) +[2024-09-05 09:28:56,845][00556] Fps is (10 sec: 2867.2, 60 sec: 2355.2, 300 sec: 2355.2). Total num frames: 4112384. Throughput: 0: 583.6. Samples: 23344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:28:56,852][00556] Avg episode reward: [(0, '18.192')] +[2024-09-05 09:29:01,845][00556] Fps is (10 sec: 4095.9, 60 sec: 2639.6, 300 sec: 2639.6). Total num frames: 4136960. Throughput: 0: 670.4. Samples: 30168. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:29:01,847][00556] Avg episode reward: [(0, '21.429')] +[2024-09-05 09:29:02,576][15095] Updated weights for policy 0, policy_version 1011 (0.0029) +[2024-09-05 09:29:06,845][00556] Fps is (10 sec: 4096.0, 60 sec: 2703.4, 300 sec: 2703.4). Total num frames: 4153344. Throughput: 0: 746.0. Samples: 33582. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:29:06,852][00556] Avg episode reward: [(0, '22.022')] +[2024-09-05 09:29:11,845][00556] Fps is (10 sec: 3276.9, 60 sec: 2755.5, 300 sec: 2755.5). Total num frames: 4169728. Throughput: 0: 795.6. Samples: 37796. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:29:11,847][00556] Avg episode reward: [(0, '24.475')] +[2024-09-05 09:29:11,864][15082] Saving new best policy, reward=24.475! +[2024-09-05 09:29:14,541][15095] Updated weights for policy 0, policy_version 1021 (0.0026) +[2024-09-05 09:29:16,845][00556] Fps is (10 sec: 3686.4, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 4190208. Throughput: 0: 908.7. Samples: 43870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:29:16,848][00556] Avg episode reward: [(0, '22.554')] +[2024-09-05 09:29:21,845][00556] Fps is (10 sec: 4505.5, 60 sec: 3276.8, 300 sec: 3024.7). Total num frames: 4214784. Throughput: 0: 950.9. Samples: 47202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:29:21,848][00556] Avg episode reward: [(0, '21.062')] +[2024-09-05 09:29:23,862][15095] Updated weights for policy 0, policy_version 1031 (0.0015) +[2024-09-05 09:29:26,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 2984.2). Total num frames: 4227072. Throughput: 0: 952.4. Samples: 52734. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:29:26,861][00556] Avg episode reward: [(0, '20.565')] +[2024-09-05 09:29:31,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3058.3). Total num frames: 4247552. Throughput: 0: 912.1. Samples: 57698. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:29:31,847][00556] Avg episode reward: [(0, '20.991')] +[2024-09-05 09:29:35,102][15095] Updated weights for policy 0, policy_version 1041 (0.0052) +[2024-09-05 09:29:36,846][00556] Fps is (10 sec: 4095.6, 60 sec: 3822.9, 300 sec: 3123.2). Total num frames: 4268032. Throughput: 0: 938.8. Samples: 61110. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:29:36,854][00556] Avg episode reward: [(0, '22.126')] +[2024-09-05 09:29:41,845][00556] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3180.4). Total num frames: 4288512. Throughput: 0: 984.8. Samples: 67658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:29:41,850][00556] Avg episode reward: [(0, '21.720')] +[2024-09-05 09:29:46,845][00556] Fps is (10 sec: 3277.1, 60 sec: 3618.1, 300 sec: 3140.3). Total num frames: 4300800. Throughput: 0: 927.8. Samples: 71918. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:29:46,852][00556] Avg episode reward: [(0, '22.858')] +[2024-09-05 09:29:46,987][15095] Updated weights for policy 0, policy_version 1051 (0.0022) +[2024-09-05 09:29:51,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3233.7). Total num frames: 4325376. Throughput: 0: 918.6. Samples: 74918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:29:51,850][00556] Avg episode reward: [(0, '22.899')] +[2024-09-05 09:29:51,860][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001056_4325376.pth... +[2024-09-05 09:29:51,987][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000980_4014080.pth +[2024-09-05 09:29:56,185][15095] Updated weights for policy 0, policy_version 1061 (0.0030) +[2024-09-05 09:29:56,848][00556] Fps is (10 sec: 4504.4, 60 sec: 3891.0, 300 sec: 3276.7). Total num frames: 4345856. Throughput: 0: 975.8. Samples: 81710. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:29:56,853][00556] Avg episode reward: [(0, '23.362')] +[2024-09-05 09:30:01,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3276.8). Total num frames: 4362240. Throughput: 0: 954.2. Samples: 86808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:30:01,854][00556] Avg episode reward: [(0, '22.478')] +[2024-09-05 09:30:06,845][00556] Fps is (10 sec: 3277.7, 60 sec: 3754.7, 300 sec: 3276.8). Total num frames: 4378624. Throughput: 0: 926.3. Samples: 88884. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:30:06,850][00556] Avg episode reward: [(0, '22.636')] +[2024-09-05 09:30:08,110][15095] Updated weights for policy 0, policy_version 1071 (0.0017) +[2024-09-05 09:30:11,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3348.0). Total num frames: 4403200. Throughput: 0: 948.1. Samples: 95398. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:30:11,851][00556] Avg episode reward: [(0, '23.126')] +[2024-09-05 09:30:16,846][00556] Fps is (10 sec: 4095.6, 60 sec: 3822.9, 300 sec: 3345.0). Total num frames: 4419584. Throughput: 0: 977.4. Samples: 101682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:30:16,855][00556] Avg episode reward: [(0, '22.725')] +[2024-09-05 09:30:18,509][15095] Updated weights for policy 0, policy_version 1081 (0.0031) +[2024-09-05 09:30:21,845][00556] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3342.3). Total num frames: 4435968. Throughput: 0: 946.1. Samples: 103684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:30:21,851][00556] Avg episode reward: [(0, '22.778')] +[2024-09-05 09:30:26,845][00556] Fps is (10 sec: 3686.8, 60 sec: 3822.9, 300 sec: 3371.3). Total num frames: 4456448. Throughput: 0: 923.2. Samples: 109204. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:30:26,852][00556] Avg episode reward: [(0, '21.538')] +[2024-09-05 09:30:29,118][15095] Updated weights for policy 0, policy_version 1091 (0.0026) +[2024-09-05 09:30:31,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3428.5). Total num frames: 4481024. Throughput: 0: 984.5. Samples: 116220. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:30:31,852][00556] Avg episode reward: [(0, '21.585')] +[2024-09-05 09:30:36,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3423.1). Total num frames: 4497408. Throughput: 0: 975.6. Samples: 118822. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:30:36,849][00556] Avg episode reward: [(0, '21.406')] +[2024-09-05 09:30:40,892][15095] Updated weights for policy 0, policy_version 1101 (0.0028) +[2024-09-05 09:30:41,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3418.0). Total num frames: 4513792. Throughput: 0: 916.2. Samples: 122938. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:30:41,851][00556] Avg episode reward: [(0, '22.522')] +[2024-09-05 09:30:46,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3440.6). Total num frames: 4534272. Throughput: 0: 957.8. Samples: 129910. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:30:46,847][00556] Avg episode reward: [(0, '21.149')] +[2024-09-05 09:30:49,860][15095] Updated weights for policy 0, policy_version 1111 (0.0032) +[2024-09-05 09:30:51,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3461.8). Total num frames: 4554752. Throughput: 0: 989.0. Samples: 133390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:30:51,847][00556] Avg episode reward: [(0, '21.726')] +[2024-09-05 09:30:56,848][00556] Fps is (10 sec: 3685.3, 60 sec: 3754.6, 300 sec: 3455.9). Total num frames: 4571136. Throughput: 0: 947.9. Samples: 138058. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:30:56,852][00556] Avg episode reward: [(0, '21.112')] +[2024-09-05 09:31:01,517][15095] Updated weights for policy 0, policy_version 1121 (0.0026) +[2024-09-05 09:31:01,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3475.4). Total num frames: 4591616. Throughput: 0: 937.2. Samples: 143856. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:31:01,850][00556] Avg episode reward: [(0, '20.799')] +[2024-09-05 09:31:06,845][00556] Fps is (10 sec: 4097.3, 60 sec: 3891.2, 300 sec: 3493.6). Total num frames: 4612096. Throughput: 0: 969.0. Samples: 147288. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:31:06,847][00556] Avg episode reward: [(0, '20.544')] +[2024-09-05 09:31:11,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3487.4). Total num frames: 4628480. Throughput: 0: 974.1. Samples: 153038. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:31:11,851][00556] Avg episode reward: [(0, '22.286')] +[2024-09-05 09:31:12,181][15095] Updated weights for policy 0, policy_version 1131 (0.0022) +[2024-09-05 09:31:16,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3481.6). Total num frames: 4644864. Throughput: 0: 921.1. Samples: 157670. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:31:16,850][00556] Avg episode reward: [(0, '23.865')] +[2024-09-05 09:31:21,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3520.3). Total num frames: 4669440. Throughput: 0: 941.6. Samples: 161192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:31:21,852][00556] Avg episode reward: [(0, '25.017')] +[2024-09-05 09:31:21,863][15082] Saving new best policy, reward=25.017! +[2024-09-05 09:31:22,398][15095] Updated weights for policy 0, policy_version 1141 (0.0024) +[2024-09-05 09:31:26,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3535.5). Total num frames: 4689920. Throughput: 0: 1002.4. Samples: 168048. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:31:26,851][00556] Avg episode reward: [(0, '25.733')] +[2024-09-05 09:31:26,855][15082] Saving new best policy, reward=25.733! +[2024-09-05 09:31:31,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3528.9). Total num frames: 4706304. Throughput: 0: 940.9. Samples: 172252. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:31:31,847][00556] Avg episode reward: [(0, '26.322')] +[2024-09-05 09:31:31,865][15082] Saving new best policy, reward=26.322! +[2024-09-05 09:31:34,124][15095] Updated weights for policy 0, policy_version 1151 (0.0044) +[2024-09-05 09:31:36,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3543.0). Total num frames: 4726784. Throughput: 0: 922.5. Samples: 174904. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:31:36,850][00556] Avg episode reward: [(0, '26.651')] +[2024-09-05 09:31:36,853][15082] Saving new best policy, reward=26.651! +[2024-09-05 09:31:41,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3556.5). Total num frames: 4747264. Throughput: 0: 966.7. Samples: 181556. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:31:41,848][00556] Avg episode reward: [(0, '24.023')] +[2024-09-05 09:31:43,480][15095] Updated weights for policy 0, policy_version 1161 (0.0016) +[2024-09-05 09:31:46,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3549.9). Total num frames: 4763648. Throughput: 0: 955.6. Samples: 186860. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:31:46,848][00556] Avg episode reward: [(0, '23.279')] +[2024-09-05 09:31:51,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3543.5). Total num frames: 4780032. Throughput: 0: 926.7. Samples: 188988. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:31:51,847][00556] Avg episode reward: [(0, '22.193')] +[2024-09-05 09:31:51,859][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001167_4780032.pth... +[2024-09-05 09:31:51,985][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth +[2024-09-05 09:31:55,419][15095] Updated weights for policy 0, policy_version 1171 (0.0028) +[2024-09-05 09:31:56,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3556.1). Total num frames: 4800512. Throughput: 0: 937.3. Samples: 195218. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:31:56,847][00556] Avg episode reward: [(0, '23.028')] +[2024-09-05 09:32:01,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3568.1). Total num frames: 4820992. Throughput: 0: 980.0. Samples: 201768. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:32:01,849][00556] Avg episode reward: [(0, '22.446')] +[2024-09-05 09:32:06,751][15095] Updated weights for policy 0, policy_version 1181 (0.0024) +[2024-09-05 09:32:06,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3561.7). Total num frames: 4837376. Throughput: 0: 946.4. Samples: 203778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:32:06,852][00556] Avg episode reward: [(0, '21.130')] +[2024-09-05 09:32:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3555.7). Total num frames: 4853760. Throughput: 0: 903.0. Samples: 208684. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:32:11,847][00556] Avg episode reward: [(0, '21.423')] +[2024-09-05 09:32:16,420][15095] Updated weights for policy 0, policy_version 1191 (0.0038) +[2024-09-05 09:32:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3584.0). Total num frames: 4878336. Throughput: 0: 964.3. Samples: 215644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:32:16,847][00556] Avg episode reward: [(0, '22.584')] +[2024-09-05 09:32:21,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3577.7). Total num frames: 4894720. Throughput: 0: 971.5. Samples: 218620. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:32:21,852][00556] Avg episode reward: [(0, '22.038')] +[2024-09-05 09:32:26,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3571.7). Total num frames: 4911104. Throughput: 0: 917.7. Samples: 222854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:32:26,850][00556] Avg episode reward: [(0, '21.314')] +[2024-09-05 09:32:28,347][15095] Updated weights for policy 0, policy_version 1201 (0.0035) +[2024-09-05 09:32:31,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3565.9). Total num frames: 4927488. Throughput: 0: 915.7. Samples: 228066. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:32:31,850][00556] Avg episode reward: [(0, '21.059')] +[2024-09-05 09:32:36,845][00556] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3544.6). Total num frames: 4939776. Throughput: 0: 914.8. Samples: 230154. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:32:36,850][00556] Avg episode reward: [(0, '22.290')] +[2024-09-05 09:32:41,845][00556] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3539.6). Total num frames: 4956160. Throughput: 0: 869.8. Samples: 234360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:32:41,851][00556] Avg episode reward: [(0, '23.813')] +[2024-09-05 09:32:42,951][15095] Updated weights for policy 0, policy_version 1211 (0.0033) +[2024-09-05 09:32:46,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3549.9). Total num frames: 4976640. Throughput: 0: 847.8. Samples: 239918. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:32:46,853][00556] Avg episode reward: [(0, '23.624')] +[2024-09-05 09:32:51,845][00556] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3559.8). Total num frames: 4997120. Throughput: 0: 879.4. Samples: 243350. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:32:51,853][00556] Avg episode reward: [(0, '23.383')] +[2024-09-05 09:32:52,025][15095] Updated weights for policy 0, policy_version 1221 (0.0027) +[2024-09-05 09:32:56,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3569.4). Total num frames: 5017600. Throughput: 0: 906.1. Samples: 249458. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:32:56,849][00556] Avg episode reward: [(0, '22.860')] +[2024-09-05 09:33:01,845][00556] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3549.9). Total num frames: 5029888. Throughput: 0: 849.7. Samples: 253882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:33:01,848][00556] Avg episode reward: [(0, '23.334')] +[2024-09-05 09:33:03,816][15095] Updated weights for policy 0, policy_version 1231 (0.0027) +[2024-09-05 09:33:06,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3573.4). Total num frames: 5054464. Throughput: 0: 859.9. Samples: 257316. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:33:06,847][00556] Avg episode reward: [(0, '22.053')] +[2024-09-05 09:33:11,845][00556] Fps is (10 sec: 4505.7, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5074944. Throughput: 0: 916.6. Samples: 264100. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:33:11,850][00556] Avg episode reward: [(0, '21.364')] +[2024-09-05 09:33:13,924][15095] Updated weights for policy 0, policy_version 1241 (0.0031) +[2024-09-05 09:33:16,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 5087232. Throughput: 0: 903.4. Samples: 268720. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-05 09:33:16,854][00556] Avg episode reward: [(0, '20.723')] +[2024-09-05 09:33:21,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 5107712. Throughput: 0: 908.3. Samples: 271028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:33:21,852][00556] Avg episode reward: [(0, '21.677')] +[2024-09-05 09:33:24,848][15095] Updated weights for policy 0, policy_version 1251 (0.0031) +[2024-09-05 09:33:26,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 5132288. Throughput: 0: 969.2. Samples: 277974. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:33:26,851][00556] Avg episode reward: [(0, '23.140')] +[2024-09-05 09:33:31,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 5148672. Throughput: 0: 975.6. Samples: 283822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:33:31,849][00556] Avg episode reward: [(0, '22.439')] +[2024-09-05 09:33:36,817][15095] Updated weights for policy 0, policy_version 1261 (0.0019) +[2024-09-05 09:33:36,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5165056. Throughput: 0: 944.6. Samples: 285858. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:33:36,855][00556] Avg episode reward: [(0, '23.419')] +[2024-09-05 09:33:41,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 5185536. Throughput: 0: 938.0. Samples: 291666. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:33:41,848][00556] Avg episode reward: [(0, '22.524')] +[2024-09-05 09:33:45,737][15095] Updated weights for policy 0, policy_version 1271 (0.0034) +[2024-09-05 09:33:46,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 5210112. Throughput: 0: 993.0. Samples: 298568. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:33:46,851][00556] Avg episode reward: [(0, '22.333')] +[2024-09-05 09:33:51,845][00556] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 5222400. Throughput: 0: 965.4. Samples: 300758. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:33:51,848][00556] Avg episode reward: [(0, '21.998')] +[2024-09-05 09:33:51,944][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001276_5226496.pth... +[2024-09-05 09:33:52,166][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001056_4325376.pth +[2024-09-05 09:33:56,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5242880. Throughput: 0: 917.6. Samples: 305392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:33:56,853][00556] Avg episode reward: [(0, '22.549')] +[2024-09-05 09:33:57,552][15095] Updated weights for policy 0, policy_version 1281 (0.0048) +[2024-09-05 09:34:01,845][00556] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5263360. Throughput: 0: 971.0. Samples: 312414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:34:01,852][00556] Avg episode reward: [(0, '22.893')] +[2024-09-05 09:34:06,846][00556] Fps is (10 sec: 4095.5, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 5283840. Throughput: 0: 995.0. Samples: 315802. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:34:06,849][00556] Avg episode reward: [(0, '23.235')] +[2024-09-05 09:34:07,491][15095] Updated weights for policy 0, policy_version 1291 (0.0038) +[2024-09-05 09:34:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 5296128. Throughput: 0: 933.2. Samples: 319968. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:34:11,848][00556] Avg episode reward: [(0, '23.700')] +[2024-09-05 09:34:16,845][00556] Fps is (10 sec: 3686.8, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 5320704. Throughput: 0: 941.7. Samples: 326200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:34:16,849][00556] Avg episode reward: [(0, '25.317')] +[2024-09-05 09:34:18,311][15095] Updated weights for policy 0, policy_version 1301 (0.0030) +[2024-09-05 09:34:21,845][00556] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 5345280. Throughput: 0: 973.2. Samples: 329654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:34:21,847][00556] Avg episode reward: [(0, '25.854')] +[2024-09-05 09:34:26,848][00556] Fps is (10 sec: 3685.2, 60 sec: 3754.5, 300 sec: 3762.7). Total num frames: 5357568. Throughput: 0: 964.0. Samples: 335050. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:34:26,850][00556] Avg episode reward: [(0, '25.711')] +[2024-09-05 09:34:30,099][15095] Updated weights for policy 0, policy_version 1311 (0.0031) +[2024-09-05 09:34:31,845][00556] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5373952. Throughput: 0: 925.9. Samples: 340232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:34:31,847][00556] Avg episode reward: [(0, '26.315')] +[2024-09-05 09:34:36,845][00556] Fps is (10 sec: 4097.3, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5398528. Throughput: 0: 952.1. Samples: 343604. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:34:36,847][00556] Avg episode reward: [(0, '26.038')] +[2024-09-05 09:34:38,735][15095] Updated weights for policy 0, policy_version 1321 (0.0024) +[2024-09-05 09:34:41,848][00556] Fps is (10 sec: 4504.2, 60 sec: 3891.0, 300 sec: 3790.5). Total num frames: 5419008. Throughput: 0: 994.4. Samples: 350142. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:34:41,851][00556] Avg episode reward: [(0, '25.790')] +[2024-09-05 09:34:46,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 5431296. Throughput: 0: 928.4. Samples: 354194. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-05 09:34:46,851][00556] Avg episode reward: [(0, '25.458')] +[2024-09-05 09:34:51,053][15095] Updated weights for policy 0, policy_version 1331 (0.0022) +[2024-09-05 09:34:51,845][00556] Fps is (10 sec: 3277.8, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 5451776. Throughput: 0: 917.6. Samples: 357092. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:34:51,847][00556] Avg episode reward: [(0, '24.572')] +[2024-09-05 09:34:56,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 5476352. Throughput: 0: 977.2. Samples: 363944. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:34:56,852][00556] Avg episode reward: [(0, '25.046')] +[2024-09-05 09:35:01,794][15095] Updated weights for policy 0, policy_version 1341 (0.0038) +[2024-09-05 09:35:01,847][00556] Fps is (10 sec: 4095.1, 60 sec: 3822.8, 300 sec: 3776.6). Total num frames: 5492736. Throughput: 0: 952.3. Samples: 369056. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:35:01,850][00556] Avg episode reward: [(0, '25.079')] +[2024-09-05 09:35:06,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5509120. Throughput: 0: 922.4. Samples: 371160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:35:06,847][00556] Avg episode reward: [(0, '23.864')] +[2024-09-05 09:35:11,845][00556] Fps is (10 sec: 3687.2, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5529600. Throughput: 0: 950.6. Samples: 377824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:35:11,852][00556] Avg episode reward: [(0, '23.044')] +[2024-09-05 09:35:12,061][15095] Updated weights for policy 0, policy_version 1351 (0.0026) +[2024-09-05 09:35:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 5550080. Throughput: 0: 973.2. Samples: 384026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:35:16,849][00556] Avg episode reward: [(0, '21.591')] +[2024-09-05 09:35:21,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 5566464. Throughput: 0: 944.4. Samples: 386102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:35:21,848][00556] Avg episode reward: [(0, '21.257')] +[2024-09-05 09:35:23,847][15095] Updated weights for policy 0, policy_version 1361 (0.0038) +[2024-09-05 09:35:26,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3748.9). Total num frames: 5586944. Throughput: 0: 921.6. Samples: 391610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:35:26,847][00556] Avg episode reward: [(0, '20.007')] +[2024-09-05 09:35:31,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5607424. Throughput: 0: 985.5. Samples: 398540. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:35:31,847][00556] Avg episode reward: [(0, '20.458')] +[2024-09-05 09:35:32,828][15095] Updated weights for policy 0, policy_version 1371 (0.0027) +[2024-09-05 09:35:36,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 5623808. Throughput: 0: 979.7. Samples: 401178. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:35:36,850][00556] Avg episode reward: [(0, '21.781')] +[2024-09-05 09:35:41,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.6, 300 sec: 3748.9). Total num frames: 5640192. Throughput: 0: 921.6. Samples: 405416. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:35:41,850][00556] Avg episode reward: [(0, '21.869')] +[2024-09-05 09:35:44,821][15095] Updated weights for policy 0, policy_version 1381 (0.0032) +[2024-09-05 09:35:46,845][00556] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5664768. Throughput: 0: 960.8. Samples: 412290. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:35:46,850][00556] Avg episode reward: [(0, '22.499')] +[2024-09-05 09:35:51,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 5685248. Throughput: 0: 993.0. Samples: 415846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:35:51,849][00556] Avg episode reward: [(0, '21.302')] +[2024-09-05 09:35:51,863][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001388_5685248.pth... +[2024-09-05 09:35:52,038][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001167_4780032.pth +[2024-09-05 09:35:55,555][15095] Updated weights for policy 0, policy_version 1391 (0.0026) +[2024-09-05 09:35:56,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 5697536. Throughput: 0: 947.6. Samples: 420468. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-05 09:35:56,849][00556] Avg episode reward: [(0, '20.928')] +[2024-09-05 09:36:01,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3762.8). Total num frames: 5722112. Throughput: 0: 940.9. Samples: 426366. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:36:01,852][00556] Avg episode reward: [(0, '20.765')] +[2024-09-05 09:36:05,354][15095] Updated weights for policy 0, policy_version 1401 (0.0037) +[2024-09-05 09:36:06,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 5742592. Throughput: 0: 969.3. Samples: 429720. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:36:06,853][00556] Avg episode reward: [(0, '21.896')] +[2024-09-05 09:36:11,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 5758976. Throughput: 0: 976.3. Samples: 435544. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-05 09:36:11,849][00556] Avg episode reward: [(0, '21.928')] +[2024-09-05 09:36:16,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5775360. Throughput: 0: 921.5. Samples: 440006. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-05 09:36:16,847][00556] Avg episode reward: [(0, '23.344')] +[2024-09-05 09:36:17,355][15095] Updated weights for policy 0, policy_version 1411 (0.0037) +[2024-09-05 09:36:21,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5799936. Throughput: 0: 941.9. Samples: 443562. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-05 09:36:21,852][00556] Avg episode reward: [(0, '23.724')] +[2024-09-05 09:36:25,969][15095] Updated weights for policy 0, policy_version 1421 (0.0025) +[2024-09-05 09:36:26,850][00556] Fps is (10 sec: 4503.3, 60 sec: 3890.9, 300 sec: 3776.6). Total num frames: 5820416. Throughput: 0: 1002.8. Samples: 450548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:36:26,853][00556] Avg episode reward: [(0, '24.297')] +[2024-09-05 09:36:31,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 5832704. Throughput: 0: 945.5. Samples: 454836. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:36:31,847][00556] Avg episode reward: [(0, '24.494')] +[2024-09-05 09:36:36,845][00556] Fps is (10 sec: 2868.7, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 5849088. Throughput: 0: 905.8. Samples: 456608. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:36:36,847][00556] Avg episode reward: [(0, '23.334')] +[2024-09-05 09:36:40,439][15095] Updated weights for policy 0, policy_version 1431 (0.0058) +[2024-09-05 09:36:41,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 5865472. Throughput: 0: 909.6. Samples: 461402. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:36:41,848][00556] Avg episode reward: [(0, '20.866')] +[2024-09-05 09:36:46,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 5881856. Throughput: 0: 906.4. Samples: 467154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:36:46,847][00556] Avg episode reward: [(0, '22.262')] +[2024-09-05 09:36:51,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 5898240. Throughput: 0: 880.6. Samples: 469346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:36:51,848][00556] Avg episode reward: [(0, '22.632')] +[2024-09-05 09:36:52,280][15095] Updated weights for policy 0, policy_version 1441 (0.0030) +[2024-09-05 09:36:56,845][00556] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 5922816. Throughput: 0: 886.4. Samples: 475432. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:36:56,848][00556] Avg episode reward: [(0, '21.695')] +[2024-09-05 09:37:00,946][15095] Updated weights for policy 0, policy_version 1451 (0.0046) +[2024-09-05 09:37:01,845][00556] Fps is (10 sec: 4505.5, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 5943296. Throughput: 0: 940.8. Samples: 482340. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:37:01,848][00556] Avg episode reward: [(0, '22.489')] +[2024-09-05 09:37:06,845][00556] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 5959680. Throughput: 0: 907.9. Samples: 484418. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:37:06,851][00556] Avg episode reward: [(0, '23.388')] +[2024-09-05 09:37:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 5976064. Throughput: 0: 865.4. Samples: 489486. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:37:11,852][00556] Avg episode reward: [(0, '25.665')] +[2024-09-05 09:37:12,697][15095] Updated weights for policy 0, policy_version 1461 (0.0034) +[2024-09-05 09:37:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 6000640. Throughput: 0: 922.0. Samples: 496326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:37:16,847][00556] Avg episode reward: [(0, '25.954')] +[2024-09-05 09:37:21,845][00556] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 6017024. Throughput: 0: 951.9. Samples: 499442. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:37:21,848][00556] Avg episode reward: [(0, '25.914')] +[2024-09-05 09:37:23,657][15095] Updated weights for policy 0, policy_version 1471 (0.0018) +[2024-09-05 09:37:26,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3550.2, 300 sec: 3748.9). Total num frames: 6033408. Throughput: 0: 938.3. Samples: 503624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:37:26,847][00556] Avg episode reward: [(0, '25.897')] +[2024-09-05 09:37:31,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 6057984. Throughput: 0: 960.3. Samples: 510366. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:37:31,851][00556] Avg episode reward: [(0, '24.750')] +[2024-09-05 09:37:33,484][15095] Updated weights for policy 0, policy_version 1481 (0.0038) +[2024-09-05 09:37:36,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 6078464. Throughput: 0: 988.7. Samples: 513838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:37:36,848][00556] Avg episode reward: [(0, '21.766')] +[2024-09-05 09:37:41,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 6090752. Throughput: 0: 962.5. Samples: 518744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:37:41,848][00556] Avg episode reward: [(0, '21.102')] +[2024-09-05 09:37:45,419][15095] Updated weights for policy 0, policy_version 1491 (0.0055) +[2024-09-05 09:37:46,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 6111232. Throughput: 0: 927.4. Samples: 524074. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:37:46,851][00556] Avg episode reward: [(0, '19.691')] +[2024-09-05 09:37:51,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 6135808. Throughput: 0: 957.6. Samples: 527512. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-05 09:37:51,852][00556] Avg episode reward: [(0, '20.754')] +[2024-09-05 09:37:51,863][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001498_6135808.pth... +[2024-09-05 09:37:52,015][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001276_5226496.pth +[2024-09-05 09:37:54,970][15095] Updated weights for policy 0, policy_version 1501 (0.0029) +[2024-09-05 09:37:56,851][00556] Fps is (10 sec: 4093.5, 60 sec: 3822.6, 300 sec: 3804.3). Total num frames: 6152192. Throughput: 0: 976.4. Samples: 533432. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:37:56,854][00556] Avg episode reward: [(0, '21.111')] +[2024-09-05 09:38:01,847][00556] Fps is (10 sec: 2866.6, 60 sec: 3686.3, 300 sec: 3762.7). Total num frames: 6164480. Throughput: 0: 921.0. Samples: 537774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:38:01,852][00556] Avg episode reward: [(0, '20.861')] +[2024-09-05 09:38:06,535][15095] Updated weights for policy 0, policy_version 1511 (0.0028) +[2024-09-05 09:38:06,845][00556] Fps is (10 sec: 3688.6, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 6189056. Throughput: 0: 926.8. Samples: 541150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:38:06,850][00556] Avg episode reward: [(0, '22.873')] +[2024-09-05 09:38:11,845][00556] Fps is (10 sec: 4506.6, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 6209536. Throughput: 0: 985.7. Samples: 547980. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:38:11,848][00556] Avg episode reward: [(0, '23.056')] +[2024-09-05 09:38:16,849][00556] Fps is (10 sec: 3275.5, 60 sec: 3686.2, 300 sec: 3776.6). Total num frames: 6221824. Throughput: 0: 932.6. Samples: 552336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:38:16,858][00556] Avg episode reward: [(0, '21.617')] +[2024-09-05 09:38:18,392][15095] Updated weights for policy 0, policy_version 1521 (0.0031) +[2024-09-05 09:38:21,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 6242304. Throughput: 0: 911.7. Samples: 554864. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:38:21,847][00556] Avg episode reward: [(0, '21.086')] +[2024-09-05 09:38:26,845][00556] Fps is (10 sec: 4507.3, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 6266880. Throughput: 0: 955.7. Samples: 561750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:38:26,847][00556] Avg episode reward: [(0, '23.852')] +[2024-09-05 09:38:27,349][15095] Updated weights for policy 0, policy_version 1531 (0.0017) +[2024-09-05 09:38:31,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 6283264. Throughput: 0: 960.2. Samples: 567284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:38:31,851][00556] Avg episode reward: [(0, '24.038')] +[2024-09-05 09:38:36,845][00556] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 6299648. Throughput: 0: 930.3. Samples: 569376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:38:36,847][00556] Avg episode reward: [(0, '23.419')] +[2024-09-05 09:38:39,273][15095] Updated weights for policy 0, policy_version 1541 (0.0031) +[2024-09-05 09:38:41,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 6320128. Throughput: 0: 938.3. Samples: 575648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:38:41,847][00556] Avg episode reward: [(0, '23.686')] +[2024-09-05 09:38:46,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 6340608. Throughput: 0: 982.0. Samples: 581960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:38:46,848][00556] Avg episode reward: [(0, '24.265')] +[2024-09-05 09:38:49,899][15095] Updated weights for policy 0, policy_version 1551 (0.0041) +[2024-09-05 09:38:51,850][00556] Fps is (10 sec: 3684.5, 60 sec: 3686.1, 300 sec: 3776.6). Total num frames: 6356992. Throughput: 0: 952.7. Samples: 584026. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:38:51,855][00556] Avg episode reward: [(0, '22.802')] +[2024-09-05 09:38:56,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3755.0, 300 sec: 3776.7). Total num frames: 6377472. Throughput: 0: 913.3. Samples: 589078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:38:56,847][00556] Avg episode reward: [(0, '22.264')] +[2024-09-05 09:39:00,436][15095] Updated weights for policy 0, policy_version 1561 (0.0021) +[2024-09-05 09:39:01,845][00556] Fps is (10 sec: 4098.1, 60 sec: 3891.3, 300 sec: 3776.7). Total num frames: 6397952. Throughput: 0: 966.1. Samples: 595806. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:01,848][00556] Avg episode reward: [(0, '22.712')] +[2024-09-05 09:39:06,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 6414336. Throughput: 0: 974.7. Samples: 598724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:39:06,849][00556] Avg episode reward: [(0, '23.303')] +[2024-09-05 09:39:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 6430720. Throughput: 0: 915.8. Samples: 602960. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:11,852][00556] Avg episode reward: [(0, '22.699')] +[2024-09-05 09:39:12,164][15095] Updated weights for policy 0, policy_version 1571 (0.0042) +[2024-09-05 09:39:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.5, 300 sec: 3762.8). Total num frames: 6455296. Throughput: 0: 937.8. Samples: 609484. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:39:16,848][00556] Avg episode reward: [(0, '23.351')] +[2024-09-05 09:39:21,731][15095] Updated weights for policy 0, policy_version 1581 (0.0028) +[2024-09-05 09:39:21,845][00556] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3790.6). Total num frames: 6475776. Throughput: 0: 967.5. Samples: 612914. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:21,849][00556] Avg episode reward: [(0, '23.398')] +[2024-09-05 09:39:26,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 6488064. Throughput: 0: 933.2. Samples: 617640. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:26,847][00556] Avg episode reward: [(0, '24.256')] +[2024-09-05 09:39:31,845][00556] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 6508544. Throughput: 0: 919.3. Samples: 623330. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:39:31,848][00556] Avg episode reward: [(0, '23.903')] +[2024-09-05 09:39:33,158][15095] Updated weights for policy 0, policy_version 1591 (0.0024) +[2024-09-05 09:39:36,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 6533120. Throughput: 0: 950.8. Samples: 626808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:39:36,848][00556] Avg episode reward: [(0, '25.854')] +[2024-09-05 09:39:41,852][00556] Fps is (10 sec: 4093.3, 60 sec: 3822.5, 300 sec: 3790.4). Total num frames: 6549504. Throughput: 0: 969.0. Samples: 632690. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-05 09:39:41,856][00556] Avg episode reward: [(0, '26.220')] +[2024-09-05 09:39:44,308][15095] Updated weights for policy 0, policy_version 1601 (0.0021) +[2024-09-05 09:39:46,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 6565888. Throughput: 0: 921.1. Samples: 637256. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:39:46,848][00556] Avg episode reward: [(0, '26.206')] +[2024-09-05 09:39:51,845][00556] Fps is (10 sec: 3688.8, 60 sec: 3823.3, 300 sec: 3762.8). Total num frames: 6586368. Throughput: 0: 931.4. Samples: 640636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:51,850][00556] Avg episode reward: [(0, '25.493')] +[2024-09-05 09:39:51,861][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001608_6586368.pth... +[2024-09-05 09:39:51,995][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001388_5685248.pth +[2024-09-05 09:39:54,186][15095] Updated weights for policy 0, policy_version 1611 (0.0045) +[2024-09-05 09:39:56,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 6606848. Throughput: 0: 983.5. Samples: 647216. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:39:56,852][00556] Avg episode reward: [(0, '25.076')] +[2024-09-05 09:40:01,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 6619136. Throughput: 0: 932.9. Samples: 651464. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:40:01,851][00556] Avg episode reward: [(0, '25.335')] +[2024-09-05 09:40:06,209][15095] Updated weights for policy 0, policy_version 1621 (0.0020) +[2024-09-05 09:40:06,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 6639616. Throughput: 0: 914.8. Samples: 654078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:40:06,848][00556] Avg episode reward: [(0, '25.535')] +[2024-09-05 09:40:11,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 6664192. Throughput: 0: 960.4. Samples: 660858. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:40:11,847][00556] Avg episode reward: [(0, '25.321')] +[2024-09-05 09:40:16,316][15095] Updated weights for policy 0, policy_version 1631 (0.0030) +[2024-09-05 09:40:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 6680576. Throughput: 0: 950.6. Samples: 666108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:40:16,855][00556] Avg episode reward: [(0, '24.456')] +[2024-09-05 09:40:21,846][00556] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 6696960. Throughput: 0: 918.9. Samples: 668160. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-05 09:40:21,848][00556] Avg episode reward: [(0, '23.523')] +[2024-09-05 09:40:26,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 6717440. Throughput: 0: 930.1. Samples: 674540. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-05 09:40:26,848][00556] Avg episode reward: [(0, '23.357')] +[2024-09-05 09:40:27,262][15095] Updated weights for policy 0, policy_version 1641 (0.0029) +[2024-09-05 09:40:31,848][00556] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 6737920. Throughput: 0: 970.7. Samples: 680936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:40:31,851][00556] Avg episode reward: [(0, '24.016')] +[2024-09-05 09:40:36,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 6750208. Throughput: 0: 933.3. Samples: 682634. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-05 09:40:36,848][00556] Avg episode reward: [(0, '24.016')] +[2024-09-05 09:40:41,325][15095] Updated weights for policy 0, policy_version 1651 (0.0026) +[2024-09-05 09:40:41,845][00556] Fps is (10 sec: 2457.7, 60 sec: 3550.2, 300 sec: 3721.1). Total num frames: 6762496. Throughput: 0: 862.1. Samples: 686010. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-05 09:40:41,848][00556] Avg episode reward: [(0, '22.686')] +[2024-09-05 09:40:46,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 6782976. Throughput: 0: 899.4. Samples: 691938. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-05 09:40:46,847][00556] Avg episode reward: [(0, '24.385')] +[2024-09-05 09:40:50,708][15095] Updated weights for policy 0, policy_version 1661 (0.0031) +[2024-09-05 09:40:51,849][00556] Fps is (10 sec: 4503.9, 60 sec: 3686.2, 300 sec: 3762.7). Total num frames: 6807552. Throughput: 0: 917.8. Samples: 695382. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:40:51,856][00556] Avg episode reward: [(0, '27.244')] +[2024-09-05 09:40:51,867][15082] Saving new best policy, reward=27.244! +[2024-09-05 09:40:56,846][00556] Fps is (10 sec: 3686.0, 60 sec: 3549.8, 300 sec: 3721.1). Total num frames: 6819840. Throughput: 0: 877.9. Samples: 700366. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-05 09:40:56,848][00556] Avg episode reward: [(0, '26.505')] +[2024-09-05 09:41:01,845][00556] Fps is (10 sec: 2868.3, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 6836224. Throughput: 0: 879.3. Samples: 705676. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:41:01,851][00556] Avg episode reward: [(0, '25.589')] +[2024-09-05 09:41:02,901][15095] Updated weights for policy 0, policy_version 1671 (0.0021) +[2024-09-05 09:41:06,845][00556] Fps is (10 sec: 4096.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 6860800. Throughput: 0: 910.0. Samples: 709108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:06,851][00556] Avg episode reward: [(0, '25.582')] +[2024-09-05 09:41:11,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 6877184. Throughput: 0: 903.1. Samples: 715180. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:41:11,850][00556] Avg episode reward: [(0, '25.430')] +[2024-09-05 09:41:13,658][15095] Updated weights for policy 0, policy_version 1681 (0.0020) +[2024-09-05 09:41:16,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 6893568. Throughput: 0: 853.4. Samples: 719338. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:41:16,852][00556] Avg episode reward: [(0, '24.194')] +[2024-09-05 09:41:21,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3707.3). Total num frames: 6914048. Throughput: 0: 888.3. Samples: 722606. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:21,847][00556] Avg episode reward: [(0, '23.539')] +[2024-09-05 09:41:23,899][15095] Updated weights for policy 0, policy_version 1691 (0.0037) +[2024-09-05 09:41:26,850][00556] Fps is (10 sec: 4503.5, 60 sec: 3686.1, 300 sec: 3748.8). Total num frames: 6938624. Throughput: 0: 967.1. Samples: 729532. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:26,852][00556] Avg episode reward: [(0, '24.841')] +[2024-09-05 09:41:31,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 6950912. Throughput: 0: 935.6. Samples: 734040. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:31,850][00556] Avg episode reward: [(0, '23.760')] +[2024-09-05 09:41:35,652][15095] Updated weights for policy 0, policy_version 1701 (0.0031) +[2024-09-05 09:41:36,845][00556] Fps is (10 sec: 3278.3, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 6971392. Throughput: 0: 914.6. Samples: 736536. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:36,850][00556] Avg episode reward: [(0, '24.123')] +[2024-09-05 09:41:41,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3762.8). Total num frames: 6991872. Throughput: 0: 956.1. Samples: 743390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:41,851][00556] Avg episode reward: [(0, '25.368')] +[2024-09-05 09:41:45,111][15095] Updated weights for policy 0, policy_version 1711 (0.0034) +[2024-09-05 09:41:46,846][00556] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 7012352. Throughput: 0: 964.7. Samples: 749086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:41:46,848][00556] Avg episode reward: [(0, '24.280')] +[2024-09-05 09:41:51,845][00556] Fps is (10 sec: 3276.7, 60 sec: 3618.4, 300 sec: 3735.0). Total num frames: 7024640. Throughput: 0: 933.5. Samples: 751116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:41:51,852][00556] Avg episode reward: [(0, '24.529')] +[2024-09-05 09:41:51,863][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001715_7024640.pth... +[2024-09-05 09:41:51,995][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001498_6135808.pth +[2024-09-05 09:41:56,518][15095] Updated weights for policy 0, policy_version 1721 (0.0036) +[2024-09-05 09:41:56,845][00556] Fps is (10 sec: 3686.6, 60 sec: 3823.0, 300 sec: 3748.9). Total num frames: 7049216. Throughput: 0: 932.9. Samples: 757162. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:41:56,853][00556] Avg episode reward: [(0, '24.539')] +[2024-09-05 09:42:01,845][00556] Fps is (10 sec: 4505.7, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 7069696. Throughput: 0: 993.6. Samples: 764050. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:42:01,851][00556] Avg episode reward: [(0, '24.779')] +[2024-09-05 09:42:06,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 7086080. Throughput: 0: 967.6. Samples: 766146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:42:06,850][00556] Avg episode reward: [(0, '25.192')] +[2024-09-05 09:42:08,005][15095] Updated weights for policy 0, policy_version 1731 (0.0037) +[2024-09-05 09:42:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 7102464. Throughput: 0: 921.9. Samples: 771012. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:42:11,852][00556] Avg episode reward: [(0, '25.235')] +[2024-09-05 09:42:16,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 7127040. Throughput: 0: 975.6. Samples: 777944. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:42:16,851][00556] Avg episode reward: [(0, '25.494')] +[2024-09-05 09:42:17,272][15095] Updated weights for policy 0, policy_version 1741 (0.0022) +[2024-09-05 09:42:21,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 7143424. Throughput: 0: 987.6. Samples: 780976. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:42:21,850][00556] Avg episode reward: [(0, '24.848')] +[2024-09-05 09:42:26,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.7, 300 sec: 3735.0). Total num frames: 7159808. Throughput: 0: 926.8. Samples: 785096. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:42:26,851][00556] Avg episode reward: [(0, '25.336')] +[2024-09-05 09:42:29,189][15095] Updated weights for policy 0, policy_version 1751 (0.0029) +[2024-09-05 09:42:31,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 7180288. Throughput: 0: 947.8. Samples: 791736. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:42:31,850][00556] Avg episode reward: [(0, '25.558')] +[2024-09-05 09:42:36,849][00556] Fps is (10 sec: 4503.7, 60 sec: 3891.0, 300 sec: 3776.6). Total num frames: 7204864. Throughput: 0: 978.0. Samples: 795130. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:42:36,852][00556] Avg episode reward: [(0, '25.976')] +[2024-09-05 09:42:39,424][15095] Updated weights for policy 0, policy_version 1761 (0.0031) +[2024-09-05 09:42:41,848][00556] Fps is (10 sec: 3685.5, 60 sec: 3754.5, 300 sec: 3748.8). Total num frames: 7217152. Throughput: 0: 952.5. Samples: 800028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:42:41,855][00556] Avg episode reward: [(0, '26.972')] +[2024-09-05 09:42:46,845][00556] Fps is (10 sec: 3278.2, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 7237632. Throughput: 0: 920.0. Samples: 805450. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:42:46,851][00556] Avg episode reward: [(0, '26.437')] +[2024-09-05 09:42:50,266][15095] Updated weights for policy 0, policy_version 1771 (0.0029) +[2024-09-05 09:42:51,845][00556] Fps is (10 sec: 4097.0, 60 sec: 3891.2, 300 sec: 3749.0). Total num frames: 7258112. Throughput: 0: 945.3. Samples: 808686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:42:51,847][00556] Avg episode reward: [(0, '27.632')] +[2024-09-05 09:42:51,861][15082] Saving new best policy, reward=27.632! +[2024-09-05 09:42:56,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 7278592. Throughput: 0: 972.4. Samples: 814770. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:42:56,850][00556] Avg episode reward: [(0, '27.761')] +[2024-09-05 09:42:56,854][15082] Saving new best policy, reward=27.761! +[2024-09-05 09:43:01,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 7290880. Throughput: 0: 914.5. Samples: 819096. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:43:01,848][00556] Avg episode reward: [(0, '27.259')] +[2024-09-05 09:43:02,095][15095] Updated weights for policy 0, policy_version 1781 (0.0034) +[2024-09-05 09:43:06,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 7315456. Throughput: 0: 923.2. Samples: 822520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:43:06,849][00556] Avg episode reward: [(0, '26.963')] +[2024-09-05 09:43:11,207][15095] Updated weights for policy 0, policy_version 1791 (0.0034) +[2024-09-05 09:43:11,846][00556] Fps is (10 sec: 4505.4, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 7335936. Throughput: 0: 982.4. Samples: 829304. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:43:11,852][00556] Avg episode reward: [(0, '26.581')] +[2024-09-05 09:43:16,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 7348224. Throughput: 0: 932.3. Samples: 833690. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:43:16,848][00556] Avg episode reward: [(0, '27.727')] +[2024-09-05 09:43:21,846][00556] Fps is (10 sec: 3276.7, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 7368704. Throughput: 0: 909.2. Samples: 836042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:43:21,849][00556] Avg episode reward: [(0, '27.547')] +[2024-09-05 09:43:23,323][15095] Updated weights for policy 0, policy_version 1801 (0.0040) +[2024-09-05 09:43:26,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 7393280. Throughput: 0: 953.7. Samples: 842940. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:43:26,847][00556] Avg episode reward: [(0, '26.212')] +[2024-09-05 09:43:31,848][00556] Fps is (10 sec: 4095.3, 60 sec: 3822.8, 300 sec: 3762.7). Total num frames: 7409664. Throughput: 0: 958.0. Samples: 848562. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:43:31,852][00556] Avg episode reward: [(0, '25.599')] +[2024-09-05 09:43:34,269][15095] Updated weights for policy 0, policy_version 1811 (0.0033) +[2024-09-05 09:43:36,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.7, 300 sec: 3748.9). Total num frames: 7426048. Throughput: 0: 933.4. Samples: 850688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:43:36,851][00556] Avg episode reward: [(0, '24.550')] +[2024-09-05 09:43:41,845][00556] Fps is (10 sec: 3687.3, 60 sec: 3823.1, 300 sec: 3748.9). Total num frames: 7446528. Throughput: 0: 935.6. Samples: 856874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:43:41,848][00556] Avg episode reward: [(0, '24.875')] +[2024-09-05 09:43:43,923][15095] Updated weights for policy 0, policy_version 1821 (0.0032) +[2024-09-05 09:43:46,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 7471104. Throughput: 0: 991.2. Samples: 863698. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:43:46,848][00556] Avg episode reward: [(0, '25.754')] +[2024-09-05 09:43:51,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 7483392. Throughput: 0: 960.3. Samples: 865734. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:43:51,847][00556] Avg episode reward: [(0, '25.042')] +[2024-09-05 09:43:51,860][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001827_7483392.pth... +[2024-09-05 09:43:52,041][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001608_6586368.pth +[2024-09-05 09:43:55,680][15095] Updated weights for policy 0, policy_version 1831 (0.0043) +[2024-09-05 09:43:56,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 7503872. Throughput: 0: 922.4. Samples: 870810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:43:56,850][00556] Avg episode reward: [(0, '26.128')] +[2024-09-05 09:44:01,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 7524352. Throughput: 0: 977.9. Samples: 877694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:01,850][00556] Avg episode reward: [(0, '26.850')] +[2024-09-05 09:44:05,255][15095] Updated weights for policy 0, policy_version 1841 (0.0024) +[2024-09-05 09:44:06,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 7544832. Throughput: 0: 991.7. Samples: 880668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:06,847][00556] Avg episode reward: [(0, '26.168')] +[2024-09-05 09:44:11,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 7557120. Throughput: 0: 931.4. Samples: 884854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:11,848][00556] Avg episode reward: [(0, '25.727')] +[2024-09-05 09:44:16,476][15095] Updated weights for policy 0, policy_version 1851 (0.0038) +[2024-09-05 09:44:16,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 7581696. Throughput: 0: 952.9. Samples: 891442. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:16,852][00556] Avg episode reward: [(0, '23.619')] +[2024-09-05 09:44:21,845][00556] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 7602176. Throughput: 0: 978.9. Samples: 894738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:44:21,848][00556] Avg episode reward: [(0, '24.323')] +[2024-09-05 09:44:26,846][00556] Fps is (10 sec: 3276.4, 60 sec: 3686.3, 300 sec: 3748.9). Total num frames: 7614464. Throughput: 0: 947.9. Samples: 899532. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:26,857][00556] Avg episode reward: [(0, '23.888')] +[2024-09-05 09:44:28,448][15095] Updated weights for policy 0, policy_version 1861 (0.0039) +[2024-09-05 09:44:31,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3735.0). Total num frames: 7634944. Throughput: 0: 920.4. Samples: 905118. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:44:31,851][00556] Avg episode reward: [(0, '25.102')] +[2024-09-05 09:44:36,845][00556] Fps is (10 sec: 3686.8, 60 sec: 3754.7, 300 sec: 3735.1). Total num frames: 7651328. Throughput: 0: 929.8. Samples: 907576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:44:36,852][00556] Avg episode reward: [(0, '25.657')] +[2024-09-05 09:44:41,298][15095] Updated weights for policy 0, policy_version 1871 (0.0034) +[2024-09-05 09:44:41,847][00556] Fps is (10 sec: 2866.5, 60 sec: 3618.0, 300 sec: 3721.1). Total num frames: 7663616. Throughput: 0: 902.7. Samples: 911434. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:44:41,850][00556] Avg episode reward: [(0, '24.660')] +[2024-09-05 09:44:46,845][00556] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3693.3). Total num frames: 7675904. Throughput: 0: 844.0. Samples: 915676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:44:46,847][00556] Avg episode reward: [(0, '25.863')] +[2024-09-05 09:44:51,845][00556] Fps is (10 sec: 3687.3, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 7700480. Throughput: 0: 850.8. Samples: 918952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:44:51,848][00556] Avg episode reward: [(0, '26.791')] +[2024-09-05 09:44:52,492][15095] Updated weights for policy 0, policy_version 1881 (0.0033) +[2024-09-05 09:44:56,845][00556] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 7720960. Throughput: 0: 905.0. Samples: 925580. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-05 09:44:56,848][00556] Avg episode reward: [(0, '26.411')] +[2024-09-05 09:45:01,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 7737344. Throughput: 0: 862.4. Samples: 930252. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:01,852][00556] Avg episode reward: [(0, '26.766')] +[2024-09-05 09:45:04,418][15095] Updated weights for policy 0, policy_version 1891 (0.0055) +[2024-09-05 09:45:06,845][00556] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3693.3). Total num frames: 7753728. Throughput: 0: 840.4. Samples: 932554. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:06,851][00556] Avg episode reward: [(0, '26.762')] +[2024-09-05 09:45:11,845][00556] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 7778304. Throughput: 0: 887.2. Samples: 939454. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-05 09:45:11,848][00556] Avg episode reward: [(0, '27.294')] +[2024-09-05 09:45:13,326][15095] Updated weights for policy 0, policy_version 1901 (0.0023) +[2024-09-05 09:45:16,845][00556] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 7794688. Throughput: 0: 889.6. Samples: 945150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:16,851][00556] Avg episode reward: [(0, '27.508')] +[2024-09-05 09:45:21,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3707.2). Total num frames: 7811072. Throughput: 0: 881.7. Samples: 947252. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-05 09:45:21,852][00556] Avg episode reward: [(0, '28.033')] +[2024-09-05 09:45:21,864][15082] Saving new best policy, reward=28.033! +[2024-09-05 09:45:25,366][15095] Updated weights for policy 0, policy_version 1911 (0.0024) +[2024-09-05 09:45:26,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3707.2). Total num frames: 7831552. Throughput: 0: 926.0. Samples: 953102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:26,852][00556] Avg episode reward: [(0, '26.995')] +[2024-09-05 09:45:31,845][00556] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 7852032. Throughput: 0: 983.3. Samples: 959924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:31,851][00556] Avg episode reward: [(0, '26.897')] +[2024-09-05 09:45:36,176][15095] Updated weights for policy 0, policy_version 1921 (0.0027) +[2024-09-05 09:45:36,849][00556] Fps is (10 sec: 3684.9, 60 sec: 3617.9, 300 sec: 3748.8). Total num frames: 7868416. Throughput: 0: 956.1. Samples: 961980. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:36,854][00556] Avg episode reward: [(0, '26.159')] +[2024-09-05 09:45:41,845][00556] Fps is (10 sec: 3686.5, 60 sec: 3754.8, 300 sec: 3748.9). Total num frames: 7888896. Throughput: 0: 916.9. Samples: 966842. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:41,852][00556] Avg episode reward: [(0, '27.208')] +[2024-09-05 09:45:46,242][15095] Updated weights for policy 0, policy_version 1931 (0.0029) +[2024-09-05 09:45:46,845][00556] Fps is (10 sec: 4097.7, 60 sec: 3891.2, 300 sec: 3735.0). Total num frames: 7909376. Throughput: 0: 964.3. Samples: 973646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:46,851][00556] Avg episode reward: [(0, '25.253')] +[2024-09-05 09:45:51,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 7925760. Throughput: 0: 983.7. Samples: 976822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:51,847][00556] Avg episode reward: [(0, '25.697')] +[2024-09-05 09:45:51,865][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001935_7925760.pth... +[2024-09-05 09:45:52,055][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001715_7024640.pth +[2024-09-05 09:45:56,845][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 7942144. Throughput: 0: 920.3. Samples: 980866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:45:56,847][00556] Avg episode reward: [(0, '25.991')] +[2024-09-05 09:45:58,330][15095] Updated weights for policy 0, policy_version 1941 (0.0026) +[2024-09-05 09:46:01,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 7962624. Throughput: 0: 937.1. Samples: 987318. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:46:01,847][00556] Avg episode reward: [(0, '26.448')] +[2024-09-05 09:46:06,845][00556] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 7987200. Throughput: 0: 967.3. Samples: 990780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-05 09:46:06,847][00556] Avg episode reward: [(0, '27.241')] +[2024-09-05 09:46:07,783][15095] Updated weights for policy 0, policy_version 1951 (0.0022) +[2024-09-05 09:46:11,845][00556] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 7999488. Throughput: 0: 949.2. Samples: 995818. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-05 09:46:11,852][00556] Avg episode reward: [(0, '27.430')] +[2024-09-05 09:46:13,592][15082] Stopping Batcher_0... +[2024-09-05 09:46:13,593][15082] Loop batcher_evt_loop terminating... +[2024-09-05 09:46:13,593][00556] Component Batcher_0 stopped! +[2024-09-05 09:46:13,602][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-05 09:46:13,692][15095] Weights refcount: 2 0 +[2024-09-05 09:46:13,698][15095] Stopping InferenceWorker_p0-w0... +[2024-09-05 09:46:13,698][00556] Component InferenceWorker_p0-w0 stopped! +[2024-09-05 09:46:13,707][15095] Loop inference_proc0-0_evt_loop terminating... +[2024-09-05 09:46:13,758][15082] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001827_7483392.pth +[2024-09-05 09:46:13,777][15082] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-05 09:46:13,998][00556] Component LearnerWorker_p0 stopped! +[2024-09-05 09:46:14,002][15082] Stopping LearnerWorker_p0... +[2024-09-05 09:46:14,003][15082] Loop learner_proc0_evt_loop terminating... +[2024-09-05 09:46:14,097][15102] Stopping RolloutWorker_w6... +[2024-09-05 09:46:14,098][15102] Loop rollout_proc6_evt_loop terminating... +[2024-09-05 09:46:14,097][00556] Component RolloutWorker_w6 stopped! +[2024-09-05 09:46:14,115][00556] Component RolloutWorker_w4 stopped! +[2024-09-05 09:46:14,115][15099] Stopping RolloutWorker_w4... +[2024-09-05 09:46:14,120][15099] Loop rollout_proc4_evt_loop terminating... +[2024-09-05 09:46:14,129][15096] Stopping RolloutWorker_w0... +[2024-09-05 09:46:14,129][00556] Component RolloutWorker_w0 stopped! +[2024-09-05 09:46:14,131][15096] Loop rollout_proc0_evt_loop terminating... +[2024-09-05 09:46:14,185][15100] Stopping RolloutWorker_w2... +[2024-09-05 09:46:14,185][00556] Component RolloutWorker_w2 stopped! +[2024-09-05 09:46:14,186][15100] Loop rollout_proc2_evt_loop terminating... +[2024-09-05 09:46:14,260][00556] Component RolloutWorker_w5 stopped! +[2024-09-05 09:46:14,263][15101] Stopping RolloutWorker_w5... +[2024-09-05 09:46:14,264][15101] Loop rollout_proc5_evt_loop terminating... +[2024-09-05 09:46:14,300][00556] Component RolloutWorker_w1 stopped! +[2024-09-05 09:46:14,305][15097] Stopping RolloutWorker_w1... +[2024-09-05 09:46:14,316][15097] Loop rollout_proc1_evt_loop terminating... +[2024-09-05 09:46:14,319][00556] Component RolloutWorker_w7 stopped! +[2024-09-05 09:46:14,322][15103] Stopping RolloutWorker_w7... +[2024-09-05 09:46:14,332][15103] Loop rollout_proc7_evt_loop terminating... +[2024-09-05 09:46:14,343][00556] Component RolloutWorker_w3 stopped! +[2024-09-05 09:46:14,346][00556] Waiting for process learner_proc0 to stop... +[2024-09-05 09:46:14,349][15098] Stopping RolloutWorker_w3... +[2024-09-05 09:46:14,350][15098] Loop rollout_proc3_evt_loop terminating... +[2024-09-05 09:46:15,506][00556] Waiting for process inference_proc0-0 to join... +[2024-09-05 09:46:15,514][00556] Waiting for process rollout_proc0 to join... +[2024-09-05 09:46:17,511][00556] Waiting for process rollout_proc1 to join... +[2024-09-05 09:46:17,604][00556] Waiting for process rollout_proc2 to join... +[2024-09-05 09:46:17,608][00556] Waiting for process rollout_proc3 to join... +[2024-09-05 09:46:17,611][00556] Waiting for process rollout_proc4 to join... +[2024-09-05 09:46:17,616][00556] Waiting for process rollout_proc5 to join... +[2024-09-05 09:46:17,619][00556] Waiting for process rollout_proc6 to join... +[2024-09-05 09:46:17,623][00556] Waiting for process rollout_proc7 to join... +[2024-09-05 09:46:17,627][00556] Batcher 0 profile tree view: +batching: 27.8309, releasing_batches: 0.0254 +[2024-09-05 09:46:17,630][00556] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 377.7299 +update_model: 9.6648 + weight_update: 0.0022 +one_step: 0.0238 + handle_policy_step: 644.3478 + deserialize: 15.4863, stack: 3.3304, obs_to_device_normalize: 130.8669, forward: 340.9284, send_messages: 31.0169 + prepare_outputs: 91.3202 + to_cpu: 53.1027 +[2024-09-05 09:46:17,632][00556] Learner 0 profile tree view: +misc: 0.0061, prepare_batch: 13.8632 +train: 76.2424 + epoch_init: 0.0142, minibatch_init: 0.0131, losses_postprocess: 0.7158, kl_divergence: 0.7868, after_optimizer: 3.3525 + calculate_losses: 27.2501 + losses_init: 0.0039, forward_head: 1.4150, bptt_initial: 18.2483, tail: 1.1865, advantages_returns: 0.2817, losses: 3.8163 + bptt: 1.9909 + bptt_forward_core: 1.9021 + update: 43.4576 + clip: 0.8827 +[2024-09-05 09:46:17,634][00556] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3461, enqueue_policy_requests: 95.0183, env_step: 836.8298, overhead: 13.5378, complete_rollouts: 7.2882 +save_policy_outputs: 22.1033 + split_output_tensors: 8.5073 +[2024-09-05 09:46:17,637][00556] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3294, enqueue_policy_requests: 98.0879, env_step: 835.8569, overhead: 14.6034, complete_rollouts: 6.7993 +save_policy_outputs: 21.8937 + split_output_tensors: 9.0379 +[2024-09-05 09:46:17,638][00556] Loop Runner_EvtLoop terminating... +[2024-09-05 09:46:17,639][00556] Runner profile tree view: +main_loop: 1103.4731 +[2024-09-05 09:46:17,640][00556] Collected {0: 8007680}, FPS: 3615.4 +[2024-09-05 09:46:17,668][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-05 09:46:17,670][00556] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-05 09:46:17,673][00556] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-05 09:46:17,675][00556] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-05 09:46:17,677][00556] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-05 09:46:17,678][00556] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-05 09:46:17,681][00556] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-09-05 09:46:17,684][00556] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-05 09:46:17,685][00556] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-09-05 09:46:17,686][00556] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-09-05 09:46:17,687][00556] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-05 09:46:17,690][00556] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-05 09:46:17,691][00556] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-05 09:46:17,693][00556] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-05 09:46:17,694][00556] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-05 09:46:17,733][00556] RunningMeanStd input shape: (3, 72, 128) +[2024-09-05 09:46:17,736][00556] RunningMeanStd input shape: (1,) +[2024-09-05 09:46:17,752][00556] ConvEncoder: input_channels=3 +[2024-09-05 09:46:17,799][00556] Conv encoder output size: 512 +[2024-09-05 09:46:17,801][00556] Policy head output size: 512 +[2024-09-05 09:46:17,824][00556] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-05 09:46:18,262][00556] Num frames 100... +[2024-09-05 09:46:18,386][00556] Num frames 200... +[2024-09-05 09:46:18,519][00556] Num frames 300... +[2024-09-05 09:46:18,642][00556] Num frames 400... +[2024-09-05 09:46:18,770][00556] Num frames 500... +[2024-09-05 09:46:18,893][00556] Num frames 600... +[2024-09-05 09:46:19,017][00556] Num frames 700... +[2024-09-05 09:46:19,143][00556] Num frames 800... +[2024-09-05 09:46:19,272][00556] Num frames 900... +[2024-09-05 09:46:19,397][00556] Num frames 1000... +[2024-09-05 09:46:19,525][00556] Num frames 1100... +[2024-09-05 09:46:19,651][00556] Num frames 1200... +[2024-09-05 09:46:19,781][00556] Num frames 1300... +[2024-09-05 09:46:19,907][00556] Num frames 1400... +[2024-09-05 09:46:20,033][00556] Num frames 1500... +[2024-09-05 09:46:20,122][00556] Avg episode rewards: #0: 36.280, true rewards: #0: 15.280 +[2024-09-05 09:46:20,124][00556] Avg episode reward: 36.280, avg true_objective: 15.280 +[2024-09-05 09:46:20,221][00556] Num frames 1600... +[2024-09-05 09:46:20,358][00556] Num frames 1700... +[2024-09-05 09:46:20,486][00556] Num frames 1800... +[2024-09-05 09:46:20,611][00556] Num frames 1900... +[2024-09-05 09:46:20,781][00556] Num frames 2000... +[2024-09-05 09:46:20,911][00556] Num frames 2100... +[2024-09-05 09:46:21,039][00556] Num frames 2200... +[2024-09-05 09:46:21,163][00556] Num frames 2300... +[2024-09-05 09:46:21,340][00556] Avg episode rewards: #0: 26.960, true rewards: #0: 11.960 +[2024-09-05 09:46:21,342][00556] Avg episode reward: 26.960, avg true_objective: 11.960 +[2024-09-05 09:46:21,356][00556] Num frames 2400... +[2024-09-05 09:46:21,483][00556] Num frames 2500... +[2024-09-05 09:46:21,609][00556] Num frames 2600... +[2024-09-05 09:46:21,740][00556] Num frames 2700... +[2024-09-05 09:46:21,862][00556] Num frames 2800... +[2024-09-05 09:46:21,986][00556] Num frames 2900... +[2024-09-05 09:46:22,114][00556] Num frames 3000... +[2024-09-05 09:46:22,242][00556] Num frames 3100... +[2024-09-05 09:46:22,380][00556] Num frames 3200... +[2024-09-05 09:46:22,504][00556] Num frames 3300... +[2024-09-05 09:46:22,627][00556] Num frames 3400... +[2024-09-05 09:46:22,760][00556] Num frames 3500... +[2024-09-05 09:46:22,880][00556] Num frames 3600... +[2024-09-05 09:46:23,002][00556] Num frames 3700... +[2024-09-05 09:46:23,127][00556] Num frames 3800... +[2024-09-05 09:46:23,253][00556] Num frames 3900... +[2024-09-05 09:46:23,378][00556] Avg episode rewards: #0: 30.830, true rewards: #0: 13.163 +[2024-09-05 09:46:23,380][00556] Avg episode reward: 30.830, avg true_objective: 13.163 +[2024-09-05 09:46:23,441][00556] Num frames 4000... +[2024-09-05 09:46:23,563][00556] Num frames 4100... +[2024-09-05 09:46:23,697][00556] Num frames 4200... +[2024-09-05 09:46:23,822][00556] Num frames 4300... +[2024-09-05 09:46:23,982][00556] Num frames 4400... +[2024-09-05 09:46:24,160][00556] Num frames 4500... +[2024-09-05 09:46:24,349][00556] Avg episode rewards: #0: 26.195, true rewards: #0: 11.445 +[2024-09-05 09:46:24,351][00556] Avg episode reward: 26.195, avg true_objective: 11.445 +[2024-09-05 09:46:24,390][00556] Num frames 4600... +[2024-09-05 09:46:24,565][00556] Num frames 4700... +[2024-09-05 09:46:24,738][00556] Num frames 4800... +[2024-09-05 09:46:24,900][00556] Num frames 4900... +[2024-09-05 09:46:25,063][00556] Num frames 5000... +[2024-09-05 09:46:25,242][00556] Num frames 5100... +[2024-09-05 09:46:25,419][00556] Num frames 5200... +[2024-09-05 09:46:25,597][00556] Num frames 5300... +[2024-09-05 09:46:25,771][00556] Num frames 5400... +[2024-09-05 09:46:25,951][00556] Num frames 5500... +[2024-09-05 09:46:26,024][00556] Avg episode rewards: #0: 24.614, true rewards: #0: 11.014 +[2024-09-05 09:46:26,026][00556] Avg episode reward: 24.614, avg true_objective: 11.014 +[2024-09-05 09:46:26,202][00556] Num frames 5600... +[2024-09-05 09:46:26,355][00556] Num frames 5700... +[2024-09-05 09:46:26,488][00556] Num frames 5800... +[2024-09-05 09:46:26,591][00556] Avg episode rewards: #0: 21.565, true rewards: #0: 9.732 +[2024-09-05 09:46:26,593][00556] Avg episode reward: 21.565, avg true_objective: 9.732 +[2024-09-05 09:46:26,673][00556] Num frames 5900... +[2024-09-05 09:46:26,799][00556] Num frames 6000... +[2024-09-05 09:46:26,922][00556] Num frames 6100... +[2024-09-05 09:46:27,048][00556] Num frames 6200... +[2024-09-05 09:46:27,170][00556] Num frames 6300... +[2024-09-05 09:46:27,294][00556] Num frames 6400... +[2024-09-05 09:46:27,423][00556] Num frames 6500... +[2024-09-05 09:46:27,579][00556] Avg episode rewards: #0: 20.250, true rewards: #0: 9.393 +[2024-09-05 09:46:27,580][00556] Avg episode reward: 20.250, avg true_objective: 9.393 +[2024-09-05 09:46:27,613][00556] Num frames 6600... +[2024-09-05 09:46:27,741][00556] Num frames 6700... +[2024-09-05 09:46:27,863][00556] Num frames 6800... +[2024-09-05 09:46:27,986][00556] Num frames 6900... +[2024-09-05 09:46:28,109][00556] Num frames 7000... +[2024-09-05 09:46:28,233][00556] Num frames 7100... +[2024-09-05 09:46:28,359][00556] Num frames 7200... +[2024-09-05 09:46:28,489][00556] Num frames 7300... +[2024-09-05 09:46:28,617][00556] Num frames 7400... +[2024-09-05 09:46:28,748][00556] Num frames 7500... +[2024-09-05 09:46:28,871][00556] Num frames 7600... +[2024-09-05 09:46:28,992][00556] Num frames 7700... +[2024-09-05 09:46:29,113][00556] Num frames 7800... +[2024-09-05 09:46:29,241][00556] Num frames 7900... +[2024-09-05 09:46:29,320][00556] Avg episode rewards: #0: 21.399, true rewards: #0: 9.899 +[2024-09-05 09:46:29,322][00556] Avg episode reward: 21.399, avg true_objective: 9.899 +[2024-09-05 09:46:29,422][00556] Num frames 8000... +[2024-09-05 09:46:29,552][00556] Num frames 8100... +[2024-09-05 09:46:29,681][00556] Num frames 8200... +[2024-09-05 09:46:29,806][00556] Num frames 8300... +[2024-09-05 09:46:29,929][00556] Num frames 8400... +[2024-09-05 09:46:30,048][00556] Num frames 8500... +[2024-09-05 09:46:30,167][00556] Num frames 8600... +[2024-09-05 09:46:30,290][00556] Num frames 8700... +[2024-09-05 09:46:30,414][00556] Num frames 8800... +[2024-09-05 09:46:30,548][00556] Num frames 8900... +[2024-09-05 09:46:30,670][00556] Num frames 9000... +[2024-09-05 09:46:30,796][00556] Num frames 9100... +[2024-09-05 09:46:30,918][00556] Num frames 9200... +[2024-09-05 09:46:31,042][00556] Num frames 9300... +[2024-09-05 09:46:31,163][00556] Num frames 9400... +[2024-09-05 09:46:31,325][00556] Avg episode rewards: #0: 23.433, true rewards: #0: 10.544 +[2024-09-05 09:46:31,326][00556] Avg episode reward: 23.433, avg true_objective: 10.544 +[2024-09-05 09:46:31,341][00556] Num frames 9500... +[2024-09-05 09:46:31,460][00556] Num frames 9600... +[2024-09-05 09:46:31,590][00556] Num frames 9700... +[2024-09-05 09:46:31,721][00556] Num frames 9800... +[2024-09-05 09:46:31,845][00556] Num frames 9900... +[2024-09-05 09:46:31,967][00556] Num frames 10000... +[2024-09-05 09:46:32,094][00556] Num frames 10100... +[2024-09-05 09:46:32,217][00556] Num frames 10200... +[2024-09-05 09:46:32,346][00556] Num frames 10300... +[2024-09-05 09:46:32,471][00556] Avg episode rewards: #0: 23.154, true rewards: #0: 10.354 +[2024-09-05 09:46:32,472][00556] Avg episode reward: 23.154, avg true_objective: 10.354 +[2024-09-05 09:47:37,581][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-05 09:47:38,072][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-05 09:47:38,074][00556] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-05 09:47:38,076][00556] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-05 09:47:38,079][00556] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-05 09:47:38,080][00556] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-05 09:47:38,082][00556] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-05 09:47:38,084][00556] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-05 09:47:38,085][00556] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-05 09:47:38,086][00556] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-05 09:47:38,087][00556] Adding new argument 'hf_repository'='neeldevenshah/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-05 09:47:38,088][00556] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-05 09:47:38,090][00556] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-05 09:47:38,091][00556] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-05 09:47:38,092][00556] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-05 09:47:38,093][00556] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-05 09:47:38,133][00556] RunningMeanStd input shape: (3, 72, 128) +[2024-09-05 09:47:38,136][00556] RunningMeanStd input shape: (1,) +[2024-09-05 09:47:38,153][00556] ConvEncoder: input_channels=3 +[2024-09-05 09:47:38,209][00556] Conv encoder output size: 512 +[2024-09-05 09:47:38,211][00556] Policy head output size: 512 +[2024-09-05 09:47:38,237][00556] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-05 09:47:38,853][00556] Num frames 100... +[2024-09-05 09:47:39,014][00556] Num frames 200... +[2024-09-05 09:47:39,186][00556] Num frames 300... +[2024-09-05 09:47:39,344][00556] Num frames 400... +[2024-09-05 09:47:39,500][00556] Num frames 500... +[2024-09-05 09:47:39,663][00556] Num frames 600... +[2024-09-05 09:47:39,831][00556] Num frames 700... +[2024-09-05 09:47:39,993][00556] Num frames 800... +[2024-09-05 09:47:40,145][00556] Avg episode rewards: #0: 17.600, true rewards: #0: 8.600 +[2024-09-05 09:47:40,147][00556] Avg episode reward: 17.600, avg true_objective: 8.600 +[2024-09-05 09:47:40,217][00556] Num frames 900... +[2024-09-05 09:47:40,387][00556] Num frames 1000... +[2024-09-05 09:47:40,555][00556] Num frames 1100... +[2024-09-05 09:47:40,718][00556] Num frames 1200... +[2024-09-05 09:47:40,881][00556] Num frames 1300... +[2024-09-05 09:47:41,058][00556] Num frames 1400... +[2024-09-05 09:47:41,228][00556] Num frames 1500... +[2024-09-05 09:47:41,292][00556] Avg episode rewards: #0: 14.515, true rewards: #0: 7.515 +[2024-09-05 09:47:41,294][00556] Avg episode reward: 14.515, avg true_objective: 7.515 +[2024-09-05 09:47:41,466][00556] Num frames 1600... +[2024-09-05 09:47:41,654][00556] Num frames 1700... +[2024-09-05 09:47:41,842][00556] Num frames 1800... +[2024-09-05 09:47:42,030][00556] Num frames 1900... +[2024-09-05 09:47:42,229][00556] Num frames 2000... +[2024-09-05 09:47:42,427][00556] Num frames 2100... +[2024-09-05 09:47:42,508][00556] Avg episode rewards: #0: 13.363, true rewards: #0: 7.030 +[2024-09-05 09:47:42,510][00556] Avg episode reward: 13.363, avg true_objective: 7.030 +[2024-09-05 09:47:42,679][00556] Num frames 2200... +[2024-09-05 09:47:42,895][00556] Num frames 2300... +[2024-09-05 09:47:43,103][00556] Num frames 2400... +[2024-09-05 09:47:43,337][00556] Num frames 2500... +[2024-09-05 09:47:43,523][00556] Num frames 2600... +[2024-09-05 09:47:43,733][00556] Num frames 2700... +[2024-09-05 09:47:43,949][00556] Num frames 2800... +[2024-09-05 09:47:44,146][00556] Num frames 2900... +[2024-09-05 09:47:44,361][00556] Num frames 3000... +[2024-09-05 09:47:44,584][00556] Num frames 3100... +[2024-09-05 09:47:44,842][00556] Avg episode rewards: #0: 16.243, true rewards: #0: 7.992 +[2024-09-05 09:47:44,843][00556] Avg episode reward: 16.243, avg true_objective: 7.992 +[2024-09-05 09:47:44,850][00556] Num frames 3200... +[2024-09-05 09:47:45,060][00556] Num frames 3300... +[2024-09-05 09:47:45,257][00556] Num frames 3400... +[2024-09-05 09:47:45,448][00556] Num frames 3500... +[2024-09-05 09:47:45,642][00556] Num frames 3600... +[2024-09-05 09:47:45,842][00556] Num frames 3700... +[2024-09-05 09:47:46,067][00556] Num frames 3800... +[2024-09-05 09:47:46,292][00556] Num frames 3900... +[2024-09-05 09:47:46,431][00556] Num frames 4000... +[2024-09-05 09:47:46,524][00556] Avg episode rewards: #0: 16.658, true rewards: #0: 8.058 +[2024-09-05 09:47:46,525][00556] Avg episode reward: 16.658, avg true_objective: 8.058 +[2024-09-05 09:47:46,611][00556] Num frames 4100... +[2024-09-05 09:47:46,742][00556] Num frames 4200... +[2024-09-05 09:47:46,866][00556] Num frames 4300... +[2024-09-05 09:47:46,990][00556] Num frames 4400... +[2024-09-05 09:47:47,111][00556] Num frames 4500... +[2024-09-05 09:47:47,260][00556] Avg episode rewards: #0: 15.122, true rewards: #0: 7.622 +[2024-09-05 09:47:47,261][00556] Avg episode reward: 15.122, avg true_objective: 7.622 +[2024-09-05 09:47:47,296][00556] Num frames 4600... +[2024-09-05 09:47:47,419][00556] Num frames 4700... +[2024-09-05 09:47:47,551][00556] Num frames 4800... +[2024-09-05 09:47:47,678][00556] Num frames 4900... +[2024-09-05 09:47:47,807][00556] Num frames 5000... +[2024-09-05 09:47:47,929][00556] Num frames 5100... +[2024-09-05 09:47:48,054][00556] Num frames 5200... +[2024-09-05 09:47:48,188][00556] Num frames 5300... +[2024-09-05 09:47:48,312][00556] Num frames 5400... +[2024-09-05 09:47:48,440][00556] Num frames 5500... +[2024-09-05 09:47:48,572][00556] Num frames 5600... +[2024-09-05 09:47:48,705][00556] Num frames 5700... +[2024-09-05 09:47:48,830][00556] Num frames 5800... +[2024-09-05 09:47:48,951][00556] Num frames 5900... +[2024-09-05 09:47:49,071][00556] Num frames 6000... +[2024-09-05 09:47:49,192][00556] Num frames 6100... +[2024-09-05 09:47:49,315][00556] Num frames 6200... +[2024-09-05 09:47:49,441][00556] Num frames 6300... +[2024-09-05 09:47:49,523][00556] Avg episode rewards: #0: 18.887, true rewards: #0: 9.030 +[2024-09-05 09:47:49,525][00556] Avg episode reward: 18.887, avg true_objective: 9.030 +[2024-09-05 09:47:49,628][00556] Num frames 6400... +[2024-09-05 09:47:49,760][00556] Num frames 6500... +[2024-09-05 09:47:49,883][00556] Num frames 6600... +[2024-09-05 09:47:50,006][00556] Num frames 6700... +[2024-09-05 09:47:50,129][00556] Num frames 6800... +[2024-09-05 09:47:50,251][00556] Num frames 6900... +[2024-09-05 09:47:50,377][00556] Num frames 7000... +[2024-09-05 09:47:50,506][00556] Num frames 7100... +[2024-09-05 09:47:50,637][00556] Num frames 7200... +[2024-09-05 09:47:50,764][00556] Num frames 7300... +[2024-09-05 09:47:50,890][00556] Num frames 7400... +[2024-09-05 09:47:51,014][00556] Num frames 7500... +[2024-09-05 09:47:51,139][00556] Num frames 7600... +[2024-09-05 09:47:51,263][00556] Num frames 7700... +[2024-09-05 09:47:51,387][00556] Num frames 7800... +[2024-09-05 09:47:51,509][00556] Num frames 7900... +[2024-09-05 09:47:51,649][00556] Avg episode rewards: #0: 22.336, true rewards: #0: 9.961 +[2024-09-05 09:47:51,651][00556] Avg episode reward: 22.336, avg true_objective: 9.961 +[2024-09-05 09:47:51,706][00556] Num frames 8000... +[2024-09-05 09:47:51,836][00556] Num frames 8100... +[2024-09-05 09:47:51,961][00556] Num frames 8200... +[2024-09-05 09:47:52,087][00556] Num frames 8300... +[2024-09-05 09:47:52,211][00556] Num frames 8400... +[2024-09-05 09:47:52,341][00556] Num frames 8500... +[2024-09-05 09:47:52,465][00556] Num frames 8600... +[2024-09-05 09:47:52,587][00556] Num frames 8700... +[2024-09-05 09:47:52,712][00556] Num frames 8800... +[2024-09-05 09:47:52,848][00556] Num frames 8900... +[2024-09-05 09:47:52,971][00556] Num frames 9000... +[2024-09-05 09:47:53,090][00556] Num frames 9100... +[2024-09-05 09:47:53,211][00556] Num frames 9200... +[2024-09-05 09:47:53,333][00556] Num frames 9300... +[2024-09-05 09:47:53,485][00556] Avg episode rewards: #0: 23.643, true rewards: #0: 10.421 +[2024-09-05 09:47:53,486][00556] Avg episode reward: 23.643, avg true_objective: 10.421 +[2024-09-05 09:47:53,514][00556] Num frames 9400... +[2024-09-05 09:47:53,634][00556] Num frames 9500... +[2024-09-05 09:47:53,766][00556] Num frames 9600... +[2024-09-05 09:47:53,896][00556] Num frames 9700... +[2024-09-05 09:47:54,018][00556] Num frames 9800... +[2024-09-05 09:47:54,138][00556] Num frames 9900... +[2024-09-05 09:47:54,266][00556] Avg episode rewards: #0: 22.360, true rewards: #0: 9.960 +[2024-09-05 09:47:54,267][00556] Avg episode reward: 22.360, avg true_objective: 9.960 +[2024-09-05 09:48:57,514][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4!