diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1,210 +1,41 @@ -[2024-09-01 06:28:21,632][00307] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-09-01 06:28:21,635][00307] Rollout worker 0 uses device cpu -[2024-09-01 06:28:21,638][00307] Rollout worker 1 uses device cpu -[2024-09-01 06:28:21,639][00307] Rollout worker 2 uses device cpu -[2024-09-01 06:28:21,641][00307] Rollout worker 3 uses device cpu -[2024-09-01 06:28:21,643][00307] Rollout worker 4 uses device cpu -[2024-09-01 06:28:21,645][00307] Rollout worker 5 uses device cpu -[2024-09-01 06:28:21,646][00307] Rollout worker 6 uses device cpu -[2024-09-01 06:28:21,648][00307] Rollout worker 7 uses device cpu -[2024-09-01 06:32:11,949][00307] Environment doom_basic already registered, overwriting... -[2024-09-01 06:32:11,952][00307] Environment doom_two_colors_easy already registered, overwriting... -[2024-09-01 06:32:11,955][00307] Environment doom_two_colors_hard already registered, overwriting... -[2024-09-01 06:32:11,956][00307] Environment doom_dm already registered, overwriting... -[2024-09-01 06:32:11,958][00307] Environment doom_dwango5 already registered, overwriting... -[2024-09-01 06:32:11,961][00307] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-09-01 06:32:11,962][00307] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-09-01 06:32:11,964][00307] Environment doom_my_way_home already registered, overwriting... -[2024-09-01 06:32:11,967][00307] Environment doom_deadly_corridor already registered, overwriting... -[2024-09-01 06:32:11,968][00307] Environment doom_defend_the_center already registered, overwriting... -[2024-09-01 06:32:11,969][00307] Environment doom_defend_the_line already registered, overwriting... -[2024-09-01 06:32:11,971][00307] Environment doom_health_gathering already registered, overwriting... -[2024-09-01 06:32:11,973][00307] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-09-01 06:32:11,974][00307] Environment doom_battle already registered, overwriting... -[2024-09-01 06:32:11,976][00307] Environment doom_battle2 already registered, overwriting... -[2024-09-01 06:32:11,977][00307] Environment doom_duel_bots already registered, overwriting... -[2024-09-01 06:32:11,979][00307] Environment doom_deathmatch_bots already registered, overwriting... -[2024-09-01 06:32:11,981][00307] Environment doom_duel already registered, overwriting... -[2024-09-01 06:32:11,982][00307] Environment doom_deathmatch_full already registered, overwriting... -[2024-09-01 06:32:11,984][00307] Environment doom_benchmark already registered, overwriting... -[2024-09-01 06:32:11,985][00307] register_encoder_factory: -[2024-09-01 06:32:12,019][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 06:32:12,021][00307] Overriding arg 'device' with value 'cpu' passed from command line -[2024-09-01 06:32:12,035][00307] Experiment dir /content/train_dir/default_experiment already exists! -[2024-09-01 06:32:12,039][00307] Resuming existing experiment from /content/train_dir/default_experiment... -[2024-09-01 06:32:12,040][00307] Weights and Biases integration disabled -[2024-09-01 06:32:12,046][00307] Environment var CUDA_VISIBLE_DEVICES is - -[2024-09-01 06:32:14,625][00307] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/content/train_dir -restart_behavior=resume -device=cpu -seed=None -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=8 -num_envs_per_worker=4 -batch_size=1024 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=4000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-09-01 06:32:14,627][00307] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-09-01 06:32:14,636][00307] Rollout worker 0 uses device cpu -[2024-09-01 06:32:14,638][00307] Rollout worker 1 uses device cpu -[2024-09-01 06:32:14,639][00307] Rollout worker 2 uses device cpu -[2024-09-01 06:32:14,645][00307] Rollout worker 3 uses device cpu -[2024-09-01 06:32:14,646][00307] Rollout worker 4 uses device cpu -[2024-09-01 06:32:14,648][00307] Rollout worker 5 uses device cpu -[2024-09-01 06:32:14,650][00307] Rollout worker 6 uses device cpu -[2024-09-01 06:32:14,652][00307] Rollout worker 7 uses device cpu -[2024-09-01 06:32:14,816][00307] InferenceWorker_p0-w0: min num requests: 2 -[2024-09-01 06:32:14,859][00307] Starting all processes... -[2024-09-01 06:32:14,860][00307] Starting process learner_proc0 -[2024-09-01 06:32:14,918][00307] Starting all processes... -[2024-09-01 06:32:14,930][00307] Starting process inference_proc0-0 -[2024-09-01 06:32:14,931][00307] Starting process rollout_proc0 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc1 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc2 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc3 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc4 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc5 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc6 -[2024-09-01 06:32:14,932][00307] Starting process rollout_proc7 -[2024-09-01 06:32:30,958][04801] Starting seed is not provided -[2024-09-01 06:32:30,958][04801] Initializing actor-critic model on device cpu -[2024-09-01 06:32:30,959][04801] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 06:32:30,970][04801] RunningMeanStd input shape: (1,) -[2024-09-01 06:32:31,128][04801] ConvEncoder: input_channels=3 -[2024-09-01 06:32:31,184][04820] Worker 6 uses CPU cores [0] -[2024-09-01 06:32:31,432][04817] Worker 1 uses CPU cores [1] -[2024-09-01 06:32:31,551][04816] Worker 2 uses CPU cores [0] -[2024-09-01 06:32:31,605][04818] Worker 3 uses CPU cores [1] -[2024-09-01 06:32:31,642][04819] Worker 4 uses CPU cores [0] -[2024-09-01 06:32:31,652][04821] Worker 5 uses CPU cores [1] -[2024-09-01 06:32:31,673][04822] Worker 7 uses CPU cores [1] -[2024-09-01 06:32:31,699][04815] Worker 0 uses CPU cores [0] -[2024-09-01 06:32:31,803][04801] Conv encoder output size: 512 -[2024-09-01 06:32:31,803][04801] Policy head output size: 512 -[2024-09-01 06:32:31,829][04801] Created Actor Critic model with architecture: -[2024-09-01 06:32:31,829][04801] ActorCriticSharedWeights( +[2024-09-01 14:50:18,637][00194] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-01 14:50:18,643][00194] Rollout worker 0 uses device cpu +[2024-09-01 14:50:18,645][00194] Rollout worker 1 uses device cpu +[2024-09-01 14:50:18,646][00194] Rollout worker 2 uses device cpu +[2024-09-01 14:50:18,648][00194] Rollout worker 3 uses device cpu +[2024-09-01 14:50:18,649][00194] Rollout worker 4 uses device cpu +[2024-09-01 14:50:18,651][00194] Rollout worker 5 uses device cpu +[2024-09-01 14:50:18,653][00194] Rollout worker 6 uses device cpu +[2024-09-01 14:50:18,654][00194] Rollout worker 7 uses device cpu +[2024-09-01 14:50:18,826][00194] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-01 14:50:18,874][00194] Starting all processes... +[2024-09-01 14:50:18,879][00194] Starting process learner_proc0 +[2024-09-01 14:50:18,932][00194] Starting all processes... +[2024-09-01 14:50:18,945][00194] Starting process inference_proc0-0 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc0 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc1 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc2 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc3 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc4 +[2024-09-01 14:50:18,946][00194] Starting process rollout_proc5 +[2024-09-01 14:50:18,947][00194] Starting process rollout_proc6 +[2024-09-01 14:50:18,947][00194] Starting process rollout_proc7 +[2024-09-01 14:50:32,730][03021] Starting seed is not provided +[2024-09-01 14:50:32,732][03021] Initializing actor-critic model on device cpu +[2024-09-01 14:50:32,733][03021] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 14:50:32,735][03021] RunningMeanStd input shape: (1,) +[2024-09-01 14:50:32,820][03021] ConvEncoder: input_channels=3 +[2024-09-01 14:50:33,363][03035] Worker 0 uses CPU cores [0] +[2024-09-01 14:50:33,505][03042] Worker 7 uses CPU cores [1] +[2024-09-01 14:50:33,519][03038] Worker 3 uses CPU cores [1] +[2024-09-01 14:50:33,572][03041] Worker 6 uses CPU cores [0] +[2024-09-01 14:50:33,653][03039] Worker 4 uses CPU cores [0] +[2024-09-01 14:50:33,669][03037] Worker 2 uses CPU cores [0] +[2024-09-01 14:50:33,694][03021] Conv encoder output size: 512 +[2024-09-01 14:50:33,696][03021] Policy head output size: 512 +[2024-09-01 14:50:33,724][03021] Created Actor Critic model with architecture: +[2024-09-01 14:50:33,728][03036] Worker 1 uses CPU cores [1] +[2024-09-01 14:50:33,726][03021] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -245,1333 +76,2335 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-09-01 06:32:32,452][04801] Using optimizer -[2024-09-01 06:32:32,454][04801] No checkpoints found -[2024-09-01 06:32:32,454][04801] Did not load from checkpoint, starting from scratch! -[2024-09-01 06:32:32,455][04801] Initialized policy 0 weights for model version 0 -[2024-09-01 06:32:32,458][04801] LearnerWorker_p0 finished initialization! -[2024-09-01 06:32:32,467][04814] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 06:32:32,470][04814] RunningMeanStd input shape: (1,) -[2024-09-01 06:32:32,495][04814] ConvEncoder: input_channels=3 -[2024-09-01 06:32:32,705][04814] Conv encoder output size: 512 -[2024-09-01 06:32:32,706][04814] Policy head output size: 512 -[2024-09-01 06:32:32,736][00307] Inference worker 0-0 is ready! -[2024-09-01 06:32:32,738][00307] All inference workers are ready! Signal rollout workers to start! -[2024-09-01 06:32:32,884][04815] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,888][04820] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,882][04816] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,889][04819] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,892][04817] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,897][04821] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,899][04822] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:32,908][04818] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 06:32:34,051][04818] Decorrelating experience for 0 frames... -[2024-09-01 06:32:34,050][04815] Decorrelating experience for 0 frames... -[2024-09-01 06:32:34,108][04822] Decorrelating experience for 0 frames... -[2024-09-01 06:32:34,540][04815] Decorrelating experience for 32 frames... -[2024-09-01 06:32:34,811][00307] Heartbeat connected on Batcher_0 -[2024-09-01 06:32:34,814][00307] Heartbeat connected on LearnerWorker_p0 -[2024-09-01 06:32:34,847][00307] Heartbeat connected on InferenceWorker_p0-w0 -[2024-09-01 06:32:34,993][04818] Decorrelating experience for 32 frames... -[2024-09-01 06:32:35,096][04822] Decorrelating experience for 32 frames... -[2024-09-01 06:32:35,237][04815] Decorrelating experience for 64 frames... -[2024-09-01 06:32:36,072][04818] Decorrelating experience for 64 frames... -[2024-09-01 06:32:36,283][04822] Decorrelating experience for 64 frames... -[2024-09-01 06:32:36,391][04819] Decorrelating experience for 0 frames... -[2024-09-01 06:32:36,506][04816] Decorrelating experience for 0 frames... -[2024-09-01 06:32:36,673][04815] Decorrelating experience for 96 frames... -[2024-09-01 06:32:36,888][00307] Heartbeat connected on RolloutWorker_w0 -[2024-09-01 06:32:37,046][00307] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 06:32:37,241][04818] Decorrelating experience for 96 frames... -[2024-09-01 06:32:37,431][00307] Heartbeat connected on RolloutWorker_w3 -[2024-09-01 06:32:37,467][04822] Decorrelating experience for 96 frames... -[2024-09-01 06:32:37,622][00307] Heartbeat connected on RolloutWorker_w7 -[2024-09-01 06:32:38,118][04819] Decorrelating experience for 32 frames... -[2024-09-01 06:32:38,690][04816] Decorrelating experience for 32 frames... -[2024-09-01 06:32:42,049][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 220.7. Samples: 1104. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 06:32:42,053][00307] Avg episode reward: [(0, '2.930')] -[2024-09-01 06:32:42,324][04819] Decorrelating experience for 64 frames... -[2024-09-01 06:32:43,242][04816] Decorrelating experience for 64 frames... -[2024-09-01 06:32:47,046][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 153.6. Samples: 1536. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 06:32:47,055][00307] Avg episode reward: [(0, '3.385')] -[2024-09-01 06:32:47,961][04819] Decorrelating experience for 96 frames... -[2024-09-01 06:32:48,680][00307] Heartbeat connected on RolloutWorker_w4 -[2024-09-01 06:32:48,727][04801] Signal inference workers to stop experience collection... -[2024-09-01 06:32:48,758][04814] InferenceWorker_p0-w0: stopping experience collection -[2024-09-01 06:32:48,805][04816] Decorrelating experience for 96 frames... -[2024-09-01 06:32:49,063][00307] Heartbeat connected on RolloutWorker_w2 -[2024-09-01 06:32:49,189][04801] Signal inference workers to resume experience collection... -[2024-09-01 06:32:49,191][04814] InferenceWorker_p0-w0: resuming experience collection -[2024-09-01 06:32:52,046][00307] Fps is (10 sec: 409.7, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 4096. Throughput: 0: 168.7. Samples: 2530. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-09-01 06:32:52,049][00307] Avg episode reward: [(0, '3.410')] -[2024-09-01 06:32:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 8192. Throughput: 0: 210.1. Samples: 4202. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-09-01 06:32:57,048][00307] Avg episode reward: [(0, '3.576')] -[2024-09-01 06:33:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 491.5, 300 sec: 491.5). Total num frames: 12288. Throughput: 0: 182.2. Samples: 4556. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:33:02,053][00307] Avg episode reward: [(0, '3.645')] -[2024-09-01 06:33:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 16384. Throughput: 0: 198.5. Samples: 5956. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:33:07,049][00307] Avg episode reward: [(0, '3.833')] -[2024-09-01 06:33:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 702.2, 300 sec: 702.2). Total num frames: 24576. Throughput: 0: 214.1. Samples: 7492. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:33:12,060][00307] Avg episode reward: [(0, '3.930')] -[2024-09-01 06:33:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 614.4, 300 sec: 614.4). Total num frames: 24576. Throughput: 0: 198.3. Samples: 7932. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:33:17,049][00307] Avg episode reward: [(0, '4.062')] -[2024-09-01 06:33:22,047][00307] Fps is (10 sec: 409.6, 60 sec: 637.2, 300 sec: 637.2). Total num frames: 28672. Throughput: 0: 205.3. Samples: 9238. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:33:22,050][00307] Avg episode reward: [(0, '4.058')] -[2024-09-01 06:33:27,046][00307] Fps is (10 sec: 1228.8, 60 sec: 737.3, 300 sec: 737.3). Total num frames: 36864. Throughput: 0: 213.1. Samples: 10694. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:33:27,049][00307] Avg episode reward: [(0, '4.098')] -[2024-09-01 06:33:30,210][04814] Updated weights for policy 0, policy_version 10 (0.1260) -[2024-09-01 06:33:32,046][00307] Fps is (10 sec: 1228.8, 60 sec: 744.7, 300 sec: 744.7). Total num frames: 40960. Throughput: 0: 220.6. Samples: 11464. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:33:32,054][00307] Avg episode reward: [(0, '4.198')] -[2024-09-01 06:33:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 750.9). Total num frames: 45056. Throughput: 0: 222.0. Samples: 12518. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:33:37,054][00307] Avg episode reward: [(0, '4.202')] -[2024-09-01 06:33:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 756.2). Total num frames: 49152. Throughput: 0: 216.3. Samples: 13936. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:33:42,054][00307] Avg episode reward: [(0, '4.294')] -[2024-09-01 06:33:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 760.7). Total num frames: 53248. Throughput: 0: 223.5. Samples: 14614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:33:47,051][00307] Avg episode reward: [(0, '4.265')] -[2024-09-01 06:33:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 764.6). Total num frames: 57344. Throughput: 0: 220.5. Samples: 15878. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:33:52,058][00307] Avg episode reward: [(0, '4.265')] -[2024-09-01 06:33:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 768.0). Total num frames: 61440. Throughput: 0: 216.0. Samples: 17212. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:33:57,051][00307] Avg episode reward: [(0, '4.275')] -[2024-09-01 06:34:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 771.0). Total num frames: 65536. Throughput: 0: 222.7. Samples: 17954. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:02,051][00307] Avg episode reward: [(0, '4.296')] -[2024-09-01 06:34:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 773.7). Total num frames: 69632. Throughput: 0: 224.9. Samples: 19358. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:34:07,048][00307] Avg episode reward: [(0, '4.329')] -[2024-09-01 06:34:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 776.1). Total num frames: 73728. Throughput: 0: 221.7. Samples: 20672. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:34:12,054][00307] Avg episode reward: [(0, '4.347')] -[2024-09-01 06:34:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 778.2). Total num frames: 77824. Throughput: 0: 218.9. Samples: 21316. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:34:17,048][00307] Avg episode reward: [(0, '4.376')] -[2024-09-01 06:34:17,118][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000020_81920.pth... -[2024-09-01 06:34:17,123][04814] Updated weights for policy 0, policy_version 20 (0.1022) -[2024-09-01 06:34:22,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 86016. Throughput: 0: 227.2. Samples: 22744. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:22,050][00307] Avg episode reward: [(0, '4.497')] -[2024-09-01 06:34:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 782.0). Total num frames: 86016. Throughput: 0: 220.0. Samples: 23836. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:27,054][00307] Avg episode reward: [(0, '4.517')] -[2024-09-01 06:34:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 94208. Throughput: 0: 221.6. Samples: 24588. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:32,050][00307] Avg episode reward: [(0, '4.592')] -[2024-09-01 06:34:37,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 98304. Throughput: 0: 225.8. Samples: 26040. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:37,050][00307] Avg episode reward: [(0, '4.547')] -[2024-09-01 06:34:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 102400. Throughput: 0: 220.1. Samples: 27116. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:34:42,056][00307] Avg episode reward: [(0, '4.481')] -[2024-09-01 06:34:45,297][04801] Saving new best policy, reward=4.481! -[2024-09-01 06:34:47,048][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 106496. Throughput: 0: 222.2. Samples: 27954. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:34:47,056][00307] Avg episode reward: [(0, '4.419')] -[2024-09-01 06:34:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 110592. Throughput: 0: 225.6. Samples: 29512. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:52,054][00307] Avg episode reward: [(0, '4.413')] -[2024-09-01 06:34:57,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 114688. Throughput: 0: 221.4. Samples: 30636. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:34:57,058][00307] Avg episode reward: [(0, '4.383')] -[2024-09-01 06:35:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 118784. Throughput: 0: 217.9. Samples: 31120. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:35:02,052][00307] Avg episode reward: [(0, '4.296')] -[2024-09-01 06:35:03,411][04814] Updated weights for policy 0, policy_version 30 (0.1001) -[2024-09-01 06:35:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 122880. Throughput: 0: 224.2. Samples: 32834. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:35:07,056][00307] Avg episode reward: [(0, '4.225')] -[2024-09-01 06:35:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 126976. Throughput: 0: 226.1. Samples: 34010. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:35:12,056][00307] Avg episode reward: [(0, '4.193')] -[2024-09-01 06:35:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 131072. Throughput: 0: 220.5. Samples: 34512. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:35:17,054][00307] Avg episode reward: [(0, '4.151')] -[2024-09-01 06:35:22,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 844.0). Total num frames: 139264. Throughput: 0: 222.5. Samples: 36052. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:35:22,052][00307] Avg episode reward: [(0, '4.206')] -[2024-09-01 06:35:27,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 843.3). Total num frames: 143360. Throughput: 0: 231.2. Samples: 37518. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:35:27,052][00307] Avg episode reward: [(0, '4.157')] -[2024-09-01 06:35:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 842.6). Total num frames: 147456. Throughput: 0: 224.9. Samples: 38076. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:35:32,061][00307] Avg episode reward: [(0, '4.177')] -[2024-09-01 06:35:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 842.0). Total num frames: 151552. Throughput: 0: 217.6. Samples: 39304. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:35:37,049][00307] Avg episode reward: [(0, '4.312')] -[2024-09-01 06:35:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 841.3). Total num frames: 155648. Throughput: 0: 225.2. Samples: 40768. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:35:42,049][00307] Avg episode reward: [(0, '4.367')] -[2024-09-01 06:35:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 840.8). Total num frames: 159744. Throughput: 0: 228.6. Samples: 41406. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:35:47,053][00307] Avg episode reward: [(0, '4.360')] -[2024-09-01 06:35:49,882][04814] Updated weights for policy 0, policy_version 40 (0.0538) -[2024-09-01 06:35:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 840.2). Total num frames: 163840. Throughput: 0: 217.3. Samples: 42614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:35:52,052][00307] Avg episode reward: [(0, '4.431')] -[2024-09-01 06:35:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 839.7). Total num frames: 167936. Throughput: 0: 229.9. Samples: 44354. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:35:57,055][00307] Avg episode reward: [(0, '4.412')] -[2024-09-01 06:36:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 839.2). Total num frames: 172032. Throughput: 0: 235.3. Samples: 45102. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:02,049][00307] Avg episode reward: [(0, '4.415')] -[2024-09-01 06:36:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 838.7). Total num frames: 176128. Throughput: 0: 216.0. Samples: 45774. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:07,049][00307] Avg episode reward: [(0, '4.500')] -[2024-09-01 06:36:07,837][04801] Saving new best policy, reward=4.500! -[2024-09-01 06:36:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 857.3). Total num frames: 184320. Throughput: 0: 219.6. Samples: 47400. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:36:12,054][00307] Avg episode reward: [(0, '4.486')] -[2024-09-01 06:36:16,073][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000046_188416.pth... -[2024-09-01 06:36:17,047][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 856.4). Total num frames: 188416. Throughput: 0: 225.0. Samples: 48202. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:36:17,052][00307] Avg episode reward: [(0, '4.546')] -[2024-09-01 06:36:22,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 837.4). Total num frames: 188416. Throughput: 0: 224.3. Samples: 49396. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:36:22,053][00307] Avg episode reward: [(0, '4.441')] -[2024-09-01 06:36:22,096][04801] Saving new best policy, reward=4.546! -[2024-09-01 06:36:27,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 854.8). Total num frames: 196608. Throughput: 0: 217.7. Samples: 50564. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:27,049][00307] Avg episode reward: [(0, '4.392')] -[2024-09-01 06:36:32,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 854.1). Total num frames: 200704. Throughput: 0: 223.5. Samples: 51464. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:32,049][00307] Avg episode reward: [(0, '4.385')] -[2024-09-01 06:36:35,001][04814] Updated weights for policy 0, policy_version 50 (0.1674) -[2024-09-01 06:36:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 853.3). Total num frames: 204800. Throughput: 0: 223.5. Samples: 52672. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:37,049][00307] Avg episode reward: [(0, '4.349')] -[2024-09-01 06:36:39,771][04801] Signal inference workers to stop experience collection... (50 times) -[2024-09-01 06:36:39,799][04814] InferenceWorker_p0-w0: stopping experience collection (50 times) -[2024-09-01 06:36:40,363][04801] Signal inference workers to resume experience collection... (50 times) -[2024-09-01 06:36:40,365][04814] InferenceWorker_p0-w0: resuming experience collection (50 times) -[2024-09-01 06:36:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 852.6). Total num frames: 208896. Throughput: 0: 215.9. Samples: 54068. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:42,049][00307] Avg episode reward: [(0, '4.409')] -[2024-09-01 06:36:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 852.0). Total num frames: 212992. Throughput: 0: 210.5. Samples: 54574. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:47,049][00307] Avg episode reward: [(0, '4.504')] -[2024-09-01 06:36:52,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 851.3). Total num frames: 217088. Throughput: 0: 231.1. Samples: 56176. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:52,054][00307] Avg episode reward: [(0, '4.488')] -[2024-09-01 06:36:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 850.7). Total num frames: 221184. Throughput: 0: 222.2. Samples: 57400. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:36:57,052][00307] Avg episode reward: [(0, '4.590')] -[2024-09-01 06:36:58,239][04801] Saving new best policy, reward=4.590! -[2024-09-01 06:37:02,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 850.1). Total num frames: 225280. Throughput: 0: 220.8. Samples: 58138. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:02,058][00307] Avg episode reward: [(0, '4.570')] -[2024-09-01 06:37:07,048][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 864.7). Total num frames: 233472. Throughput: 0: 227.6. Samples: 59638. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:07,051][00307] Avg episode reward: [(0, '4.629')] -[2024-09-01 06:37:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 849.0). Total num frames: 233472. Throughput: 0: 223.4. Samples: 60618. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:12,051][00307] Avg episode reward: [(0, '4.599')] -[2024-09-01 06:37:12,466][04801] Saving new best policy, reward=4.629! -[2024-09-01 06:37:17,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 863.1). Total num frames: 241664. Throughput: 0: 223.1. Samples: 61504. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:37:17,049][00307] Avg episode reward: [(0, '4.586')] -[2024-09-01 06:37:20,281][04814] Updated weights for policy 0, policy_version 60 (0.1080) -[2024-09-01 06:37:22,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 862.3). Total num frames: 245760. Throughput: 0: 228.2. Samples: 62940. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:37:22,055][00307] Avg episode reward: [(0, '4.757')] -[2024-09-01 06:37:25,355][04801] Saving new best policy, reward=4.757! -[2024-09-01 06:37:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 861.6). Total num frames: 249856. Throughput: 0: 221.5. Samples: 64034. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:27,050][00307] Avg episode reward: [(0, '4.803')] -[2024-09-01 06:37:30,324][04801] Saving new best policy, reward=4.803! -[2024-09-01 06:37:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 253952. Throughput: 0: 227.7. Samples: 64822. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:32,049][00307] Avg episode reward: [(0, '4.721')] -[2024-09-01 06:37:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 258048. Throughput: 0: 227.5. Samples: 66412. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:37,049][00307] Avg episode reward: [(0, '4.723')] -[2024-09-01 06:37:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 262144. Throughput: 0: 226.1. Samples: 67576. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:42,053][00307] Avg episode reward: [(0, '4.727')] -[2024-09-01 06:37:47,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 266240. Throughput: 0: 219.9. Samples: 68032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:37:47,050][00307] Avg episode reward: [(0, '4.677')] -[2024-09-01 06:37:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 270336. Throughput: 0: 224.1. Samples: 69720. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:37:52,048][00307] Avg episode reward: [(0, '4.616')] -[2024-09-01 06:37:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 278528. Throughput: 0: 230.4. Samples: 70986. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:37:57,053][00307] Avg episode reward: [(0, '4.600')] -[2024-09-01 06:38:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 278528. Throughput: 0: 223.9. Samples: 71578. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:02,056][00307] Avg episode reward: [(0, '4.721')] -[2024-09-01 06:38:06,090][04814] Updated weights for policy 0, policy_version 70 (0.0047) -[2024-09-01 06:38:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 286720. Throughput: 0: 222.8. Samples: 72966. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:07,050][00307] Avg episode reward: [(0, '4.574')] -[2024-09-01 06:38:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 290816. Throughput: 0: 235.1. Samples: 74614. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:12,049][00307] Avg episode reward: [(0, '4.490')] -[2024-09-01 06:38:16,895][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000072_294912.pth... -[2024-09-01 06:38:17,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 294912. Throughput: 0: 227.2. Samples: 75048. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:17,077][00307] Avg episode reward: [(0, '4.539')] -[2024-09-01 06:38:17,319][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000020_81920.pth -[2024-09-01 06:38:22,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 294912. Throughput: 0: 199.6. Samples: 75394. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:22,056][00307] Avg episode reward: [(0, '4.563')] -[2024-09-01 06:38:27,047][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 299008. Throughput: 0: 202.2. Samples: 76676. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:27,051][00307] Avg episode reward: [(0, '4.573')] -[2024-09-01 06:38:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 303104. Throughput: 0: 206.6. Samples: 77330. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:32,049][00307] Avg episode reward: [(0, '4.518')] -[2024-09-01 06:38:37,049][00307] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 307200. Throughput: 0: 197.9. Samples: 78626. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:37,055][00307] Avg episode reward: [(0, '4.552')] -[2024-09-01 06:38:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 311296. Throughput: 0: 202.5. Samples: 80100. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:42,048][00307] Avg episode reward: [(0, '4.670')] -[2024-09-01 06:38:47,047][00307] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 315392. Throughput: 0: 200.0. Samples: 80580. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:47,054][00307] Avg episode reward: [(0, '4.684')] -[2024-09-01 06:38:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 319488. Throughput: 0: 203.4. Samples: 82118. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:38:52,051][00307] Avg episode reward: [(0, '4.668')] -[2024-09-01 06:38:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 874.7). Total num frames: 323584. Throughput: 0: 193.1. Samples: 83304. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:38:57,054][00307] Avg episode reward: [(0, '4.670')] -[2024-09-01 06:38:57,426][04814] Updated weights for policy 0, policy_version 80 (0.1538) -[2024-09-01 06:39:02,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 331776. Throughput: 0: 201.2. Samples: 84102. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:02,049][00307] Avg episode reward: [(0, '4.742')] -[2024-09-01 06:39:07,049][00307] Fps is (10 sec: 1228.5, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 335872. Throughput: 0: 220.8. Samples: 85332. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:07,057][00307] Avg episode reward: [(0, '4.626')] -[2024-09-01 06:39:12,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 339968. Throughput: 0: 211.8. Samples: 86208. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:12,049][00307] Avg episode reward: [(0, '4.626')] -[2024-09-01 06:39:17,046][00307] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 344064. Throughput: 0: 219.0. Samples: 87184. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:17,051][00307] Avg episode reward: [(0, '4.579')] -[2024-09-01 06:39:22,052][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 348160. Throughput: 0: 216.3. Samples: 88358. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:39:22,055][00307] Avg episode reward: [(0, '4.557')] -[2024-09-01 06:39:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 352256. Throughput: 0: 210.8. Samples: 89588. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:39:27,056][00307] Avg episode reward: [(0, '4.511')] -[2024-09-01 06:39:32,046][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 356352. Throughput: 0: 217.8. Samples: 90382. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:32,055][00307] Avg episode reward: [(0, '4.439')] -[2024-09-01 06:39:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 360448. Throughput: 0: 215.8. Samples: 91830. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:39:37,049][00307] Avg episode reward: [(0, '4.391')] -[2024-09-01 06:39:42,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 364544. Throughput: 0: 215.6. Samples: 93008. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:39:42,050][00307] Avg episode reward: [(0, '4.343')] -[2024-09-01 06:39:45,083][04814] Updated weights for policy 0, policy_version 90 (0.1511) -[2024-09-01 06:39:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 368640. Throughput: 0: 208.0. Samples: 93460. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:39:47,049][00307] Avg episode reward: [(0, '4.281')] -[2024-09-01 06:39:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 372736. Throughput: 0: 215.4. Samples: 95026. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:39:52,052][00307] Avg episode reward: [(0, '4.307')] -[2024-09-01 06:39:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 376832. Throughput: 0: 227.5. Samples: 96446. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:39:57,049][00307] Avg episode reward: [(0, '4.295')] -[2024-09-01 06:40:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 380928. Throughput: 0: 214.4. Samples: 96834. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:02,049][00307] Avg episode reward: [(0, '4.377')] -[2024-09-01 06:40:07,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 385024. Throughput: 0: 221.7. Samples: 98332. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:07,054][00307] Avg episode reward: [(0, '4.394')] -[2024-09-01 06:40:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 393216. Throughput: 0: 210.4. Samples: 99056. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:40:12,052][00307] Avg episode reward: [(0, '4.365')] -[2024-09-01 06:40:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 393216. Throughput: 0: 216.7. Samples: 100134. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:40:17,052][00307] Avg episode reward: [(0, '4.329')] -[2024-09-01 06:40:17,859][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000097_397312.pth... -[2024-09-01 06:40:18,008][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000046_188416.pth -[2024-09-01 06:40:22,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 401408. Throughput: 0: 216.2. Samples: 101560. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:22,049][00307] Avg episode reward: [(0, '4.398')] -[2024-09-01 06:40:27,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 405504. Throughput: 0: 223.1. Samples: 103046. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:27,049][00307] Avg episode reward: [(0, '4.355')] -[2024-09-01 06:40:30,666][04814] Updated weights for policy 0, policy_version 100 (0.1213) -[2024-09-01 06:40:32,051][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 409600. Throughput: 0: 226.7. Samples: 103664. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:32,054][00307] Avg episode reward: [(0, '4.384')] -[2024-09-01 06:40:35,692][04801] Signal inference workers to stop experience collection... (100 times) -[2024-09-01 06:40:35,724][04814] InferenceWorker_p0-w0: stopping experience collection (100 times) -[2024-09-01 06:40:36,196][04801] Signal inference workers to resume experience collection... (100 times) -[2024-09-01 06:40:36,198][04814] InferenceWorker_p0-w0: resuming experience collection (100 times) -[2024-09-01 06:40:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 413696. Throughput: 0: 216.8. Samples: 104780. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:37,049][00307] Avg episode reward: [(0, '4.347')] -[2024-09-01 06:40:42,046][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 417792. Throughput: 0: 218.8. Samples: 106294. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:42,054][00307] Avg episode reward: [(0, '4.408')] -[2024-09-01 06:40:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 421888. Throughput: 0: 220.2. Samples: 106742. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:47,049][00307] Avg episode reward: [(0, '4.377')] -[2024-09-01 06:40:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 425984. Throughput: 0: 212.6. Samples: 107898. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:40:52,050][00307] Avg episode reward: [(0, '4.373')] -[2024-09-01 06:40:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 430080. Throughput: 0: 232.1. Samples: 109502. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-09-01 06:40:57,053][00307] Avg episode reward: [(0, '4.432')] -[2024-09-01 06:41:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 434176. Throughput: 0: 224.2. Samples: 110224. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:02,054][00307] Avg episode reward: [(0, '4.479')] -[2024-09-01 06:41:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 438272. Throughput: 0: 216.9. Samples: 111320. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:07,055][00307] Avg episode reward: [(0, '4.489')] -[2024-09-01 06:41:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 442368. Throughput: 0: 218.2. Samples: 112864. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:12,049][00307] Avg episode reward: [(0, '4.561')] -[2024-09-01 06:41:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 446464. Throughput: 0: 219.0. Samples: 113516. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:41:17,050][00307] Avg episode reward: [(0, '4.600')] -[2024-09-01 06:41:17,362][04814] Updated weights for policy 0, policy_version 110 (0.0603) -[2024-09-01 06:41:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 450560. Throughput: 0: 223.3. Samples: 114830. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:41:22,053][00307] Avg episode reward: [(0, '4.725')] -[2024-09-01 06:41:27,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 454656. Throughput: 0: 216.5. Samples: 116038. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:41:27,055][00307] Avg episode reward: [(0, '4.653')] -[2024-09-01 06:41:32,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 462848. Throughput: 0: 224.6. Samples: 116850. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:32,054][00307] Avg episode reward: [(0, '4.709')] -[2024-09-01 06:41:37,049][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 466944. Throughput: 0: 225.1. Samples: 118030. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:37,051][00307] Avg episode reward: [(0, '4.812')] -[2024-09-01 06:41:41,549][04801] Saving new best policy, reward=4.812! -[2024-09-01 06:41:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 471040. Throughput: 0: 212.8. Samples: 119078. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:41:42,055][00307] Avg episode reward: [(0, '4.812')] -[2024-09-01 06:41:47,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 475136. Throughput: 0: 219.0. Samples: 120080. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:41:47,054][00307] Avg episode reward: [(0, '4.810')] -[2024-09-01 06:41:52,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 479232. Throughput: 0: 225.2. Samples: 121456. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:52,062][00307] Avg episode reward: [(0, '4.817')] -[2024-09-01 06:41:55,244][04801] Saving new best policy, reward=4.817! -[2024-09-01 06:41:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 483328. Throughput: 0: 212.5. Samples: 122426. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:41:57,052][00307] Avg episode reward: [(0, '4.819')] -[2024-09-01 06:42:00,066][04801] Saving new best policy, reward=4.819! -[2024-09-01 06:42:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 487424. Throughput: 0: 215.3. Samples: 123204. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:02,052][00307] Avg episode reward: [(0, '4.882')] -[2024-09-01 06:42:03,931][04801] Saving new best policy, reward=4.882! -[2024-09-01 06:42:03,939][04814] Updated weights for policy 0, policy_version 120 (0.0059) -[2024-09-01 06:42:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 491520. Throughput: 0: 221.1. Samples: 124780. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:07,049][00307] Avg episode reward: [(0, '4.867')] -[2024-09-01 06:42:12,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 495616. Throughput: 0: 218.7. Samples: 125882. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:12,059][00307] Avg episode reward: [(0, '4.933')] -[2024-09-01 06:42:12,088][00307] Components not started: RolloutWorker_w1, RolloutWorker_w5, RolloutWorker_w6, wait_time=600.1 seconds -[2024-09-01 06:42:14,299][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000122_499712.pth... -[2024-09-01 06:42:14,410][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000072_294912.pth -[2024-09-01 06:42:14,422][04801] Saving new best policy, reward=4.933! -[2024-09-01 06:42:17,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 499712. Throughput: 0: 212.5. Samples: 126414. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:17,055][00307] Avg episode reward: [(0, '4.976')] -[2024-09-01 06:42:22,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 503808. Throughput: 0: 217.6. Samples: 127820. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:22,049][00307] Avg episode reward: [(0, '4.923')] -[2024-09-01 06:42:22,577][04801] Saving new best policy, reward=4.976! -[2024-09-01 06:42:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 507904. Throughput: 0: 226.7. Samples: 129278. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:27,050][00307] Avg episode reward: [(0, '4.930')] -[2024-09-01 06:42:32,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 512000. Throughput: 0: 211.5. Samples: 129596. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:32,050][00307] Avg episode reward: [(0, '4.953')] -[2024-09-01 06:42:37,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 520192. Throughput: 0: 217.6. Samples: 131248. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:37,053][00307] Avg episode reward: [(0, '4.908')] -[2024-09-01 06:42:42,051][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 524288. Throughput: 0: 223.8. Samples: 132498. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:42,059][00307] Avg episode reward: [(0, '4.918')] -[2024-09-01 06:42:47,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 524288. Throughput: 0: 219.6. Samples: 133084. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:42:47,050][00307] Avg episode reward: [(0, '4.826')] -[2024-09-01 06:42:51,731][04814] Updated weights for policy 0, policy_version 130 (0.1524) -[2024-09-01 06:42:52,046][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 532480. Throughput: 0: 215.2. Samples: 134464. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:52,049][00307] Avg episode reward: [(0, '4.746')] -[2024-09-01 06:42:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 536576. Throughput: 0: 220.7. Samples: 135812. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:42:57,049][00307] Avg episode reward: [(0, '4.652')] -[2024-09-01 06:43:02,054][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 540672. Throughput: 0: 221.4. Samples: 136380. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:02,057][00307] Avg episode reward: [(0, '4.617')] -[2024-09-01 06:43:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 544768. Throughput: 0: 215.9. Samples: 137534. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:07,049][00307] Avg episode reward: [(0, '4.541')] -[2024-09-01 06:43:12,046][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 548864. Throughput: 0: 223.5. Samples: 139336. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:12,050][00307] Avg episode reward: [(0, '4.449')] -[2024-09-01 06:43:17,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 552960. Throughput: 0: 225.9. Samples: 139760. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:43:17,050][00307] Avg episode reward: [(0, '4.426')] -[2024-09-01 06:43:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 557056. Throughput: 0: 214.4. Samples: 140894. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:43:22,049][00307] Avg episode reward: [(0, '4.377')] -[2024-09-01 06:43:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 561152. Throughput: 0: 224.5. Samples: 142598. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:27,050][00307] Avg episode reward: [(0, '4.309')] -[2024-09-01 06:43:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 565248. Throughput: 0: 226.2. Samples: 143264. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:32,054][00307] Avg episode reward: [(0, '4.408')] -[2024-09-01 06:43:37,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 569344. Throughput: 0: 216.8. Samples: 144222. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:37,052][00307] Avg episode reward: [(0, '4.430')] -[2024-09-01 06:43:38,311][04814] Updated weights for policy 0, policy_version 140 (0.1026) -[2024-09-01 06:43:42,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 577536. Throughput: 0: 220.1. Samples: 145718. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:42,054][00307] Avg episode reward: [(0, '4.423')] -[2024-09-01 06:43:47,046][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 581632. Throughput: 0: 227.7. Samples: 146624. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:47,052][00307] Avg episode reward: [(0, '4.361')] -[2024-09-01 06:43:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 585728. Throughput: 0: 225.0. Samples: 147658. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:52,054][00307] Avg episode reward: [(0, '4.455')] -[2024-09-01 06:43:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 589824. Throughput: 0: 216.6. Samples: 149082. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:43:57,056][00307] Avg episode reward: [(0, '4.445')] -[2024-09-01 06:44:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 874.7). Total num frames: 593920. Throughput: 0: 221.6. Samples: 149734. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:02,053][00307] Avg episode reward: [(0, '4.538')] -[2024-09-01 06:44:07,050][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 598016. Throughput: 0: 225.7. Samples: 151050. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:07,052][00307] Avg episode reward: [(0, '4.551')] -[2024-09-01 06:44:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 602112. Throughput: 0: 218.0. Samples: 152406. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:12,054][00307] Avg episode reward: [(0, '4.597')] -[2024-09-01 06:44:14,054][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000148_606208.pth... -[2024-09-01 06:44:14,163][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000097_397312.pth -[2024-09-01 06:44:17,047][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 606208. Throughput: 0: 219.2. Samples: 153130. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:17,051][00307] Avg episode reward: [(0, '4.620')] -[2024-09-01 06:44:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 610304. Throughput: 0: 223.8. Samples: 154294. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:22,051][00307] Avg episode reward: [(0, '4.666')] -[2024-09-01 06:44:24,054][04814] Updated weights for policy 0, policy_version 150 (0.0556) -[2024-09-01 06:44:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 614400. Throughput: 0: 219.8. Samples: 155608. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:27,049][00307] Avg episode reward: [(0, '4.680')] -[2024-09-01 06:44:31,997][04801] Signal inference workers to stop experience collection... (150 times) -[2024-09-01 06:44:32,021][04814] InferenceWorker_p0-w0: stopping experience collection (150 times) -[2024-09-01 06:44:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 618496. Throughput: 0: 212.0. Samples: 156164. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:44:32,049][00307] Avg episode reward: [(0, '4.797')] -[2024-09-01 06:44:32,619][04801] Signal inference workers to resume experience collection... (150 times) -[2024-09-01 06:44:32,620][04814] InferenceWorker_p0-w0: resuming experience collection (150 times) -[2024-09-01 06:44:37,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 626688. Throughput: 0: 226.6. Samples: 157856. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:44:37,052][00307] Avg episode reward: [(0, '4.706')] -[2024-09-01 06:44:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 626688. Throughput: 0: 216.6. Samples: 158828. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:44:42,049][00307] Avg episode reward: [(0, '4.680')] -[2024-09-01 06:44:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 634880. Throughput: 0: 221.1. Samples: 159684. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:47,053][00307] Avg episode reward: [(0, '4.672')] -[2024-09-01 06:44:52,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 638976. Throughput: 0: 223.2. Samples: 161094. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:52,049][00307] Avg episode reward: [(0, '4.888')] -[2024-09-01 06:44:57,052][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 643072. Throughput: 0: 217.4. Samples: 162188. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:44:57,058][00307] Avg episode reward: [(0, '4.846')] -[2024-09-01 06:45:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 647168. Throughput: 0: 217.8. Samples: 162930. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:45:02,054][00307] Avg episode reward: [(0, '4.846')] -[2024-09-01 06:45:07,046][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 651264. Throughput: 0: 220.5. Samples: 164218. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:45:07,052][00307] Avg episode reward: [(0, '4.885')] -[2024-09-01 06:45:09,701][04814] Updated weights for policy 0, policy_version 160 (0.2669) -[2024-09-01 06:45:12,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 655360. Throughput: 0: 221.2. Samples: 165562. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:45:12,057][00307] Avg episode reward: [(0, '4.840')] -[2024-09-01 06:45:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 659456. Throughput: 0: 221.5. Samples: 166132. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:45:17,051][00307] Avg episode reward: [(0, '4.791')] -[2024-09-01 06:45:22,051][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 663552. Throughput: 0: 218.2. Samples: 167678. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:45:22,054][00307] Avg episode reward: [(0, '4.778')] -[2024-09-01 06:45:27,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 667648. Throughput: 0: 227.6. Samples: 169070. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:45:27,056][00307] Avg episode reward: [(0, '4.824')] -[2024-09-01 06:45:32,050][00307] Fps is (10 sec: 819.3, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 671744. Throughput: 0: 219.4. Samples: 169556. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:45:32,057][00307] Avg episode reward: [(0, '4.821')] -[2024-09-01 06:45:37,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 675840. Throughput: 0: 221.0. Samples: 171040. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:45:37,052][00307] Avg episode reward: [(0, '4.783')] -[2024-09-01 06:45:42,046][00307] Fps is (10 sec: 1229.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 684032. Throughput: 0: 228.2. Samples: 172456. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:45:42,049][00307] Avg episode reward: [(0, '4.838')] -[2024-09-01 06:45:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 684032. Throughput: 0: 225.5. Samples: 173076. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:45:47,049][00307] Avg episode reward: [(0, '4.829')] -[2024-09-01 06:45:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 692224. Throughput: 0: 223.4. Samples: 174270. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:45:52,049][00307] Avg episode reward: [(0, '4.742')] -[2024-09-01 06:45:56,256][04814] Updated weights for policy 0, policy_version 170 (0.1006) -[2024-09-01 06:45:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 696320. Throughput: 0: 224.4. Samples: 175662. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:45:57,048][00307] Avg episode reward: [(0, '4.725')] -[2024-09-01 06:46:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 700416. Throughput: 0: 228.5. Samples: 176416. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:02,053][00307] Avg episode reward: [(0, '4.843')] -[2024-09-01 06:46:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 704512. Throughput: 0: 217.5. Samples: 177464. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:07,048][00307] Avg episode reward: [(0, '4.881')] -[2024-09-01 06:46:12,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 708608. Throughput: 0: 219.7. Samples: 178956. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:46:12,056][00307] Avg episode reward: [(0, '4.863')] -[2024-09-01 06:46:13,922][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth... -[2024-09-01 06:46:14,024][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000122_499712.pth -[2024-09-01 06:46:17,054][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 712704. Throughput: 0: 227.5. Samples: 179796. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:46:17,058][00307] Avg episode reward: [(0, '4.873')] -[2024-09-01 06:46:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 716800. Throughput: 0: 215.4. Samples: 180734. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:22,056][00307] Avg episode reward: [(0, '4.976')] -[2024-09-01 06:46:27,047][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 720896. Throughput: 0: 221.8. Samples: 182436. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:27,049][00307] Avg episode reward: [(0, '4.849')] -[2024-09-01 06:46:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 724992. Throughput: 0: 219.1. Samples: 182934. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:32,054][00307] Avg episode reward: [(0, '5.001')] -[2024-09-01 06:46:37,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 729088. Throughput: 0: 223.9. Samples: 184344. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:37,053][00307] Avg episode reward: [(0, '5.127')] -[2024-09-01 06:46:38,042][04801] Saving new best policy, reward=5.001! -[2024-09-01 06:46:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 733184. Throughput: 0: 221.7. Samples: 185640. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:42,049][00307] Avg episode reward: [(0, '5.156')] -[2024-09-01 06:46:42,425][04801] Saving new best policy, reward=5.127! -[2024-09-01 06:46:42,430][04814] Updated weights for policy 0, policy_version 180 (0.1015) -[2024-09-01 06:46:46,585][04801] Saving new best policy, reward=5.156! -[2024-09-01 06:46:47,046][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 741376. Throughput: 0: 223.0. Samples: 186450. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:47,048][00307] Avg episode reward: [(0, '5.165')] -[2024-09-01 06:46:51,326][04801] Saving new best policy, reward=5.165! -[2024-09-01 06:46:52,051][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 745472. Throughput: 0: 226.0. Samples: 187634. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:46:52,059][00307] Avg episode reward: [(0, '5.060')] -[2024-09-01 06:46:57,048][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 749568. Throughput: 0: 218.7. Samples: 188800. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:46:57,059][00307] Avg episode reward: [(0, '5.055')] -[2024-09-01 06:47:02,046][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 753664. Throughput: 0: 220.7. Samples: 189724. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:02,048][00307] Avg episode reward: [(0, '5.189')] -[2024-09-01 06:47:04,938][04801] Saving new best policy, reward=5.189! -[2024-09-01 06:47:07,047][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 757760. Throughput: 0: 230.2. Samples: 191092. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:47:07,054][00307] Avg episode reward: [(0, '5.182')] -[2024-09-01 06:47:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 761856. Throughput: 0: 212.8. Samples: 192010. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:47:12,053][00307] Avg episode reward: [(0, '5.134')] -[2024-09-01 06:47:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 765952. Throughput: 0: 219.8. Samples: 192824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:47:17,053][00307] Avg episode reward: [(0, '5.202')] -[2024-09-01 06:47:18,984][04801] Saving new best policy, reward=5.202! -[2024-09-01 06:47:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 770048. Throughput: 0: 223.7. Samples: 194408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:47:22,057][00307] Avg episode reward: [(0, '5.121')] -[2024-09-01 06:47:27,050][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 774144. Throughput: 0: 218.8. Samples: 195486. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:27,055][00307] Avg episode reward: [(0, '5.019')] -[2024-09-01 06:47:29,651][04814] Updated weights for policy 0, policy_version 190 (0.0556) -[2024-09-01 06:47:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 778240. Throughput: 0: 208.5. Samples: 195832. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:32,048][00307] Avg episode reward: [(0, '5.043')] -[2024-09-01 06:47:37,046][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 782336. Throughput: 0: 225.5. Samples: 197780. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:37,055][00307] Avg episode reward: [(0, '5.087')] -[2024-09-01 06:47:42,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 786432. Throughput: 0: 226.0. Samples: 198968. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:42,049][00307] Avg episode reward: [(0, '5.065')] -[2024-09-01 06:47:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 790528. Throughput: 0: 214.3. Samples: 199368. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:47,048][00307] Avg episode reward: [(0, '5.110')] -[2024-09-01 06:47:52,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 798720. Throughput: 0: 218.6. Samples: 200928. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:52,051][00307] Avg episode reward: [(0, '5.071')] -[2024-09-01 06:47:57,048][00307] Fps is (10 sec: 1228.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 802816. Throughput: 0: 228.1. Samples: 202274. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:47:57,055][00307] Avg episode reward: [(0, '5.094')] -[2024-09-01 06:48:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 806912. Throughput: 0: 223.4. Samples: 202876. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:02,051][00307] Avg episode reward: [(0, '5.078')] -[2024-09-01 06:48:07,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 811008. Throughput: 0: 217.2. Samples: 204182. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:07,049][00307] Avg episode reward: [(0, '5.280')] -[2024-09-01 06:48:09,337][04801] Saving new best policy, reward=5.280! -[2024-09-01 06:48:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 815104. Throughput: 0: 229.7. Samples: 205820. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:12,050][00307] Avg episode reward: [(0, '5.254')] -[2024-09-01 06:48:14,491][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000200_819200.pth... -[2024-09-01 06:48:14,494][04814] Updated weights for policy 0, policy_version 200 (0.0039) -[2024-09-01 06:48:14,603][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000148_606208.pth -[2024-09-01 06:48:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 819200. Throughput: 0: 230.4. Samples: 206200. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:17,056][00307] Avg episode reward: [(0, '5.193')] -[2024-09-01 06:48:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 823296. Throughput: 0: 214.3. Samples: 207422. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:22,049][00307] Avg episode reward: [(0, '5.117')] -[2024-09-01 06:48:23,443][04801] Signal inference workers to stop experience collection... (200 times) -[2024-09-01 06:48:23,474][04814] InferenceWorker_p0-w0: stopping experience collection (200 times) -[2024-09-01 06:48:24,400][04801] Signal inference workers to resume experience collection... (200 times) -[2024-09-01 06:48:24,402][04814] InferenceWorker_p0-w0: resuming experience collection (200 times) -[2024-09-01 06:48:27,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 827392. Throughput: 0: 214.8. Samples: 208634. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:27,056][00307] Avg episode reward: [(0, '5.199')] -[2024-09-01 06:48:32,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 827392. Throughput: 0: 216.4. Samples: 209104. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:32,052][00307] Avg episode reward: [(0, '5.199')] -[2024-09-01 06:48:37,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 831488. Throughput: 0: 197.5. Samples: 209816. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:48:37,050][00307] Avg episode reward: [(0, '5.208')] -[2024-09-01 06:48:42,047][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 839680. Throughput: 0: 195.9. Samples: 211088. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:42,049][00307] Avg episode reward: [(0, '5.186')] -[2024-09-01 06:48:47,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 843776. Throughput: 0: 205.4. Samples: 212118. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:47,049][00307] Avg episode reward: [(0, '5.275')] -[2024-09-01 06:48:52,046][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 843776. Throughput: 0: 198.3. Samples: 213104. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:48:52,051][00307] Avg episode reward: [(0, '5.495')] -[2024-09-01 06:48:52,188][04801] Saving new best policy, reward=5.495! -[2024-09-01 06:48:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 851968. Throughput: 0: 190.7. Samples: 214400. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:48:57,050][00307] Avg episode reward: [(0, '5.653')] -[2024-09-01 06:49:00,295][04801] Saving new best policy, reward=5.653! -[2024-09-01 06:49:02,046][00307] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 856064. Throughput: 0: 199.1. Samples: 215160. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:49:02,049][00307] Avg episode reward: [(0, '5.555')] -[2024-09-01 06:49:06,043][04814] Updated weights for policy 0, policy_version 210 (0.2196) -[2024-09-01 06:49:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 860160. Throughput: 0: 199.0. Samples: 216376. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:49:07,053][00307] Avg episode reward: [(0, '5.485')] -[2024-09-01 06:49:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 864256. Throughput: 0: 198.8. Samples: 217580. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:49:12,048][00307] Avg episode reward: [(0, '5.439')] -[2024-09-01 06:49:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 868352. Throughput: 0: 205.4. Samples: 218346. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:49:17,053][00307] Avg episode reward: [(0, '5.471')] -[2024-09-01 06:49:22,047][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 872448. Throughput: 0: 221.1. Samples: 219766. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:49:22,050][00307] Avg episode reward: [(0, '5.398')] -[2024-09-01 06:49:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 876544. Throughput: 0: 217.3. Samples: 220866. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:49:27,050][00307] Avg episode reward: [(0, '5.473')] -[2024-09-01 06:49:32,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 880640. Throughput: 0: 209.3. Samples: 221538. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:49:32,049][00307] Avg episode reward: [(0, '5.506')] -[2024-09-01 06:49:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 884736. Throughput: 0: 227.5. Samples: 223340. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:49:37,053][00307] Avg episode reward: [(0, '5.353')] -[2024-09-01 06:49:42,049][00307] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 860.8). Total num frames: 888832. Throughput: 0: 221.7. Samples: 224378. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:49:42,058][00307] Avg episode reward: [(0, '5.220')] -[2024-09-01 06:49:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 892928. Throughput: 0: 217.0. Samples: 224926. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:49:47,051][00307] Avg episode reward: [(0, '5.088')] -[2024-09-01 06:49:51,577][04814] Updated weights for policy 0, policy_version 220 (0.2052) -[2024-09-01 06:49:52,046][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 874.8). Total num frames: 901120. Throughput: 0: 228.8. Samples: 226672. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:49:52,049][00307] Avg episode reward: [(0, '5.236')] -[2024-09-01 06:49:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 905216. Throughput: 0: 216.0. Samples: 227302. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:49:57,053][00307] Avg episode reward: [(0, '5.256')] -[2024-09-01 06:50:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 909312. Throughput: 0: 219.1. Samples: 228206. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:02,049][00307] Avg episode reward: [(0, '5.168')] -[2024-09-01 06:50:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 913408. Throughput: 0: 219.0. Samples: 229620. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:07,051][00307] Avg episode reward: [(0, '5.149')] -[2024-09-01 06:50:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 917504. Throughput: 0: 220.7. Samples: 230798. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:12,051][00307] Avg episode reward: [(0, '5.023')] -[2024-09-01 06:50:16,706][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000225_921600.pth... -[2024-09-01 06:50:16,813][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000174_712704.pth -[2024-09-01 06:50:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 921600. Throughput: 0: 218.7. Samples: 231380. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:17,048][00307] Avg episode reward: [(0, '4.984')] -[2024-09-01 06:50:22,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 925696. Throughput: 0: 210.3. Samples: 232804. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:22,056][00307] Avg episode reward: [(0, '5.105')] -[2024-09-01 06:50:27,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 929792. Throughput: 0: 216.0. Samples: 234098. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:50:27,059][00307] Avg episode reward: [(0, '5.160')] -[2024-09-01 06:50:32,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 933888. Throughput: 0: 217.3. Samples: 234706. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:50:32,052][00307] Avg episode reward: [(0, '5.124')] -[2024-09-01 06:50:37,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 937984. Throughput: 0: 209.7. Samples: 236108. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:37,049][00307] Avg episode reward: [(0, '4.992')] -[2024-09-01 06:50:39,171][04814] Updated weights for policy 0, policy_version 230 (0.1174) -[2024-09-01 06:50:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 942080. Throughput: 0: 232.5. Samples: 237764. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:42,055][00307] Avg episode reward: [(0, '4.904')] -[2024-09-01 06:50:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 946176. Throughput: 0: 219.7. Samples: 238092. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:50:47,049][00307] Avg episode reward: [(0, '4.904')] -[2024-09-01 06:50:52,050][00307] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 860.8). Total num frames: 950272. Throughput: 0: 216.6. Samples: 239368. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:50:52,053][00307] Avg episode reward: [(0, '5.005')] -[2024-09-01 06:50:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 954368. Throughput: 0: 223.7. Samples: 240864. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:50:57,049][00307] Avg episode reward: [(0, '5.030')] -[2024-09-01 06:51:02,046][00307] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 958464. Throughput: 0: 224.1. Samples: 241464. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:51:02,052][00307] Avg episode reward: [(0, '5.118')] -[2024-09-01 06:51:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 962560. Throughput: 0: 217.5. Samples: 242590. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:07,050][00307] Avg episode reward: [(0, '5.036')] -[2024-09-01 06:51:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 970752. Throughput: 0: 219.5. Samples: 243974. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:51:12,050][00307] Avg episode reward: [(0, '5.049')] -[2024-09-01 06:51:17,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 974848. Throughput: 0: 228.3. Samples: 244980. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:51:17,051][00307] Avg episode reward: [(0, '4.990')] -[2024-09-01 06:51:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 978944. Throughput: 0: 221.0. Samples: 246054. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:51:22,049][00307] Avg episode reward: [(0, '5.003')] -[2024-09-01 06:51:26,156][04814] Updated weights for policy 0, policy_version 240 (0.1025) -[2024-09-01 06:51:27,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 983040. Throughput: 0: 209.0. Samples: 247168. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:51:27,055][00307] Avg episode reward: [(0, '5.055')] -[2024-09-01 06:51:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 987136. Throughput: 0: 222.9. Samples: 248122. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:32,048][00307] Avg episode reward: [(0, '5.109')] -[2024-09-01 06:51:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 991232. Throughput: 0: 222.1. Samples: 249360. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:37,052][00307] Avg episode reward: [(0, '5.128')] -[2024-09-01 06:51:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 995328. Throughput: 0: 214.4. Samples: 250512. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:42,056][00307] Avg episode reward: [(0, '5.177')] -[2024-09-01 06:51:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 999424. Throughput: 0: 217.5. Samples: 251250. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:47,049][00307] Avg episode reward: [(0, '5.111')] -[2024-09-01 06:51:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1003520. Throughput: 0: 225.8. Samples: 252752. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:52,050][00307] Avg episode reward: [(0, '5.037')] -[2024-09-01 06:51:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1007616. Throughput: 0: 217.6. Samples: 253766. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:51:57,049][00307] Avg episode reward: [(0, '4.948')] -[2024-09-01 06:52:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1011712. Throughput: 0: 213.0. Samples: 254564. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:52:02,053][00307] Avg episode reward: [(0, '4.961')] -[2024-09-01 06:52:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1015808. Throughput: 0: 224.5. Samples: 256158. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:52:07,050][00307] Avg episode reward: [(0, '5.023')] -[2024-09-01 06:52:12,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 1019904. Throughput: 0: 222.2. Samples: 257168. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:52:12,050][00307] Avg episode reward: [(0, '5.196')] -[2024-09-01 06:52:12,072][00307] Components not started: RolloutWorker_w1, RolloutWorker_w5, RolloutWorker_w6, wait_time=1200.0 seconds -[2024-09-01 06:52:13,055][04814] Updated weights for policy 0, policy_version 250 (0.0048) -[2024-09-01 06:52:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 1024000. Throughput: 0: 214.4. Samples: 257768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:52:17,054][00307] Avg episode reward: [(0, '5.176')] -[2024-09-01 06:52:17,206][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000251_1028096.pth... -[2024-09-01 06:52:17,304][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000200_819200.pth -[2024-09-01 06:52:22,048][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1032192. Throughput: 0: 224.4. Samples: 259460. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:52:22,055][00307] Avg episode reward: [(0, '5.169')] -[2024-09-01 06:52:24,935][04801] Signal inference workers to stop experience collection... (250 times) -[2024-09-01 06:52:25,014][04814] InferenceWorker_p0-w0: stopping experience collection (250 times) -[2024-09-01 06:52:26,631][04801] Signal inference workers to resume experience collection... (250 times) -[2024-09-01 06:52:26,638][04814] InferenceWorker_p0-w0: resuming experience collection (250 times) -[2024-09-01 06:52:27,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1036288. Throughput: 0: 213.3. Samples: 260110. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:52:27,054][00307] Avg episode reward: [(0, '5.163')] -[2024-09-01 06:52:32,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1040384. Throughput: 0: 215.8. Samples: 260960. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:52:32,057][00307] Avg episode reward: [(0, '5.212')] -[2024-09-01 06:52:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1044480. Throughput: 0: 216.1. Samples: 262476. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:52:37,053][00307] Avg episode reward: [(0, '5.220')] -[2024-09-01 06:52:42,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1048576. Throughput: 0: 223.3. Samples: 263814. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:52:42,050][00307] Avg episode reward: [(0, '5.324')] -[2024-09-01 06:52:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1052672. Throughput: 0: 218.0. Samples: 264372. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:52:47,050][00307] Avg episode reward: [(0, '5.275')] -[2024-09-01 06:52:52,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1056768. Throughput: 0: 215.7. Samples: 265864. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:52:52,050][00307] Avg episode reward: [(0, '5.349')] -[2024-09-01 06:52:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1060864. Throughput: 0: 223.6. Samples: 267230. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:52:57,050][00307] Avg episode reward: [(0, '5.519')] -[2024-09-01 06:53:00,005][04814] Updated weights for policy 0, policy_version 260 (0.3423) -[2024-09-01 06:53:02,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1064960. Throughput: 0: 219.0. Samples: 267624. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:02,050][00307] Avg episode reward: [(0, '5.408')] -[2024-09-01 06:53:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1069056. Throughput: 0: 214.9. Samples: 269132. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:07,055][00307] Avg episode reward: [(0, '5.474')] -[2024-09-01 06:53:12,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 1077248. Throughput: 0: 231.2. Samples: 270516. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:12,051][00307] Avg episode reward: [(0, '5.451')] -[2024-09-01 06:53:17,055][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 860.8). Total num frames: 1077248. Throughput: 0: 224.7. Samples: 271074. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:17,065][00307] Avg episode reward: [(0, '5.599')] -[2024-09-01 06:53:22,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 1081344. Throughput: 0: 223.0. Samples: 272512. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:22,049][00307] Avg episode reward: [(0, '5.639')] -[2024-09-01 06:53:27,046][00307] Fps is (10 sec: 1229.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1089536. Throughput: 0: 220.1. Samples: 273720. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:53:27,049][00307] Avg episode reward: [(0, '5.708')] -[2024-09-01 06:53:31,658][04801] Saving new best policy, reward=5.708! -[2024-09-01 06:53:32,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1093632. Throughput: 0: 225.1. Samples: 274502. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:53:32,052][00307] Avg episode reward: [(0, '5.943')] -[2024-09-01 06:53:36,694][04801] Saving new best policy, reward=5.943! -[2024-09-01 06:53:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1097728. Throughput: 0: 216.7. Samples: 275616. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:53:37,048][00307] Avg episode reward: [(0, '5.927')] -[2024-09-01 06:53:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1101824. Throughput: 0: 217.8. Samples: 277030. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:53:42,054][00307] Avg episode reward: [(0, '5.882')] -[2024-09-01 06:53:45,010][04814] Updated weights for policy 0, policy_version 270 (0.0540) -[2024-09-01 06:53:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1105920. Throughput: 0: 225.6. Samples: 277778. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:53:47,055][00307] Avg episode reward: [(0, '5.654')] -[2024-09-01 06:53:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1110016. Throughput: 0: 216.1. Samples: 278856. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:53:52,051][00307] Avg episode reward: [(0, '5.703')] -[2024-09-01 06:53:57,047][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1114112. Throughput: 0: 216.9. Samples: 280276. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:53:57,053][00307] Avg episode reward: [(0, '5.742')] -[2024-09-01 06:54:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1118208. Throughput: 0: 221.2. Samples: 281026. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:54:02,049][00307] Avg episode reward: [(0, '5.816')] -[2024-09-01 06:54:07,050][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1122304. Throughput: 0: 216.9. Samples: 282274. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:54:07,056][00307] Avg episode reward: [(0, '5.825')] -[2024-09-01 06:54:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1126400. Throughput: 0: 220.7. Samples: 283650. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:54:12,049][00307] Avg episode reward: [(0, '5.706')] -[2024-09-01 06:54:13,705][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000276_1130496.pth... -[2024-09-01 06:54:13,815][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000225_921600.pth -[2024-09-01 06:54:17,046][00307] Fps is (10 sec: 819.5, 60 sec: 887.6, 300 sec: 874.7). Total num frames: 1130496. Throughput: 0: 219.7. Samples: 284390. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:54:17,059][00307] Avg episode reward: [(0, '5.779')] -[2024-09-01 06:54:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1134592. Throughput: 0: 221.4. Samples: 285578. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:22,054][00307] Avg episode reward: [(0, '5.752')] -[2024-09-01 06:54:27,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1138688. Throughput: 0: 219.9. Samples: 286924. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:27,055][00307] Avg episode reward: [(0, '5.973')] -[2024-09-01 06:54:31,915][04801] Saving new best policy, reward=5.973! -[2024-09-01 06:54:31,921][04814] Updated weights for policy 0, policy_version 280 (0.0048) -[2024-09-01 06:54:32,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1146880. Throughput: 0: 218.8. Samples: 287624. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:32,049][00307] Avg episode reward: [(0, '6.098')] -[2024-09-01 06:54:36,132][04801] Saving new best policy, reward=6.098! -[2024-09-01 06:54:37,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1150976. Throughput: 0: 224.0. Samples: 288936. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:37,056][00307] Avg episode reward: [(0, '6.125')] -[2024-09-01 06:54:42,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1150976. Throughput: 0: 216.4. Samples: 290014. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:42,049][00307] Avg episode reward: [(0, '6.202')] -[2024-09-01 06:54:42,373][04801] Saving new best policy, reward=6.125! -[2024-09-01 06:54:46,303][04801] Saving new best policy, reward=6.202! -[2024-09-01 06:54:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1159168. Throughput: 0: 219.0. Samples: 290882. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:47,053][00307] Avg episode reward: [(0, '6.009')] -[2024-09-01 06:54:52,048][00307] Fps is (10 sec: 1228.7, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1163264. Throughput: 0: 220.4. Samples: 292190. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:52,051][00307] Avg episode reward: [(0, '6.051')] -[2024-09-01 06:54:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1167360. Throughput: 0: 212.4. Samples: 293206. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:54:57,049][00307] Avg episode reward: [(0, '5.880')] -[2024-09-01 06:55:02,046][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1171456. Throughput: 0: 213.8. Samples: 294010. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:55:02,050][00307] Avg episode reward: [(0, '5.998')] -[2024-09-01 06:55:07,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1175552. Throughput: 0: 219.4. Samples: 295452. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:55:07,050][00307] Avg episode reward: [(0, '6.144')] -[2024-09-01 06:55:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1179648. Throughput: 0: 214.3. Samples: 296566. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:55:12,049][00307] Avg episode reward: [(0, '6.232')] -[2024-09-01 06:55:15,481][04801] Saving new best policy, reward=6.232! -[2024-09-01 06:55:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1183744. Throughput: 0: 211.4. Samples: 297138. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:55:17,049][00307] Avg episode reward: [(0, '6.224')] -[2024-09-01 06:55:19,221][04814] Updated weights for policy 0, policy_version 290 (0.1680) -[2024-09-01 06:55:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1187840. Throughput: 0: 217.4. Samples: 298718. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:55:22,049][00307] Avg episode reward: [(0, '5.844')] -[2024-09-01 06:55:27,052][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1191936. Throughput: 0: 225.6. Samples: 300168. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:55:27,057][00307] Avg episode reward: [(0, '6.150')] -[2024-09-01 06:55:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1196032. Throughput: 0: 212.4. Samples: 300438. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:55:32,054][00307] Avg episode reward: [(0, '6.086')] -[2024-09-01 06:55:37,047][00307] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1200128. Throughput: 0: 217.4. Samples: 301972. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:55:37,050][00307] Avg episode reward: [(0, '6.188')] -[2024-09-01 06:55:42,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1208320. Throughput: 0: 225.4. Samples: 303348. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:55:42,054][00307] Avg episode reward: [(0, '6.515')] -[2024-09-01 06:55:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1208320. Throughput: 0: 220.5. Samples: 303932. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:55:47,053][00307] Avg episode reward: [(0, '6.549')] -[2024-09-01 06:55:48,061][04801] Saving new best policy, reward=6.515! -[2024-09-01 06:55:48,186][04801] Saving new best policy, reward=6.549! -[2024-09-01 06:55:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1216512. Throughput: 0: 219.8. Samples: 305342. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:55:52,053][00307] Avg episode reward: [(0, '6.582')] -[2024-09-01 06:55:56,102][04801] Saving new best policy, reward=6.582! -[2024-09-01 06:55:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1220608. Throughput: 0: 227.8. Samples: 306818. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:55:57,054][00307] Avg episode reward: [(0, '6.378')] -[2024-09-01 06:56:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1224704. Throughput: 0: 226.2. Samples: 307318. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:02,049][00307] Avg episode reward: [(0, '6.353')] -[2024-09-01 06:56:06,179][04814] Updated weights for policy 0, policy_version 300 (0.0553) -[2024-09-01 06:56:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1228800. Throughput: 0: 219.1. Samples: 308578. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:07,048][00307] Avg episode reward: [(0, '6.458')] -[2024-09-01 06:56:12,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1232896. Throughput: 0: 220.2. Samples: 310074. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:56:12,052][00307] Avg episode reward: [(0, '6.456')] -[2024-09-01 06:56:14,443][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000302_1236992.pth... -[2024-09-01 06:56:14,538][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000251_1028096.pth -[2024-09-01 06:56:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1236992. Throughput: 0: 227.2. Samples: 310664. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:56:17,049][00307] Avg episode reward: [(0, '6.411')] -[2024-09-01 06:56:19,148][04801] Signal inference workers to stop experience collection... (300 times) -[2024-09-01 06:56:19,181][04814] InferenceWorker_p0-w0: stopping experience collection (300 times) -[2024-09-01 06:56:20,208][04801] Signal inference workers to resume experience collection... (300 times) -[2024-09-01 06:56:20,212][04814] InferenceWorker_p0-w0: resuming experience collection (300 times) -[2024-09-01 06:56:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1241088. Throughput: 0: 212.3. Samples: 311524. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:56:22,051][00307] Avg episode reward: [(0, '6.587')] -[2024-09-01 06:56:24,425][04801] Saving new best policy, reward=6.587! -[2024-09-01 06:56:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1245184. Throughput: 0: 224.6. Samples: 313454. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 06:56:27,057][00307] Avg episode reward: [(0, '6.699')] -[2024-09-01 06:56:32,050][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1249280. Throughput: 0: 224.3. Samples: 314028. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:32,058][00307] Avg episode reward: [(0, '6.992')] -[2024-09-01 06:56:34,074][04801] Saving new best policy, reward=6.699! -[2024-09-01 06:56:34,260][04801] Saving new best policy, reward=6.992! -[2024-09-01 06:56:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1253376. Throughput: 0: 215.2. Samples: 315024. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:37,051][00307] Avg episode reward: [(0, '6.976')] -[2024-09-01 06:56:42,046][00307] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1257472. Throughput: 0: 218.2. Samples: 316638. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:42,057][00307] Avg episode reward: [(0, '6.993')] -[2024-09-01 06:56:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1261568. Throughput: 0: 218.5. Samples: 317150. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:56:47,049][00307] Avg episode reward: [(0, '6.949')] -[2024-09-01 06:56:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1265664. Throughput: 0: 220.9. Samples: 318518. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:56:52,050][00307] Avg episode reward: [(0, '6.895')] -[2024-09-01 06:56:52,786][04814] Updated weights for policy 0, policy_version 310 (0.1056) -[2024-09-01 06:56:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1269760. Throughput: 0: 216.8. Samples: 319830. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:56:57,056][00307] Avg episode reward: [(0, '7.210')] -[2024-09-01 06:57:01,060][04801] Saving new best policy, reward=7.210! -[2024-09-01 06:57:02,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1277952. Throughput: 0: 223.6. Samples: 320728. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:57:02,051][00307] Avg episode reward: [(0, '7.062')] -[2024-09-01 06:57:07,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1282048. Throughput: 0: 228.3. Samples: 321796. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:57:07,050][00307] Avg episode reward: [(0, '7.017')] -[2024-09-01 06:57:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1286144. Throughput: 0: 213.2. Samples: 323050. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:57:12,048][00307] Avg episode reward: [(0, '7.259')] -[2024-09-01 06:57:15,192][04801] Saving new best policy, reward=7.259! -[2024-09-01 06:57:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1290240. Throughput: 0: 216.9. Samples: 323786. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:57:17,056][00307] Avg episode reward: [(0, '7.402')] -[2024-09-01 06:57:19,885][04801] Saving new best policy, reward=7.402! -[2024-09-01 06:57:22,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1294336. Throughput: 0: 224.7. Samples: 325134. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:57:22,052][00307] Avg episode reward: [(0, '7.251')] -[2024-09-01 06:57:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1298432. Throughput: 0: 212.8. Samples: 326212. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:27,051][00307] Avg episode reward: [(0, '6.844')] -[2024-09-01 06:57:32,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1302528. Throughput: 0: 220.0. Samples: 327052. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:32,048][00307] Avg episode reward: [(0, '6.837')] -[2024-09-01 06:57:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1306624. Throughput: 0: 224.0. Samples: 328600. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:37,055][00307] Avg episode reward: [(0, '6.744')] -[2024-09-01 06:57:39,387][04814] Updated weights for policy 0, policy_version 320 (0.0064) -[2024-09-01 06:57:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1310720. Throughput: 0: 217.2. Samples: 329602. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:42,050][00307] Avg episode reward: [(0, '6.713')] -[2024-09-01 06:57:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1314816. Throughput: 0: 206.8. Samples: 330036. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:47,049][00307] Avg episode reward: [(0, '7.126')] -[2024-09-01 06:57:52,046][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1323008. Throughput: 0: 226.4. Samples: 331986. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:52,051][00307] Avg episode reward: [(0, '6.927')] -[2024-09-01 06:57:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1323008. Throughput: 0: 221.4. Samples: 333014. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:57:57,053][00307] Avg episode reward: [(0, '7.361')] -[2024-09-01 06:58:02,046][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1327104. Throughput: 0: 217.6. Samples: 333580. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:58:02,055][00307] Avg episode reward: [(0, '7.348')] -[2024-09-01 06:58:07,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1335296. Throughput: 0: 220.6. Samples: 335062. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:58:07,049][00307] Avg episode reward: [(0, '7.589')] -[2024-09-01 06:58:11,578][04801] Saving new best policy, reward=7.589! -[2024-09-01 06:58:12,048][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1339392. Throughput: 0: 225.2. Samples: 336346. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:58:12,054][00307] Avg episode reward: [(0, '7.528')] -[2024-09-01 06:58:16,685][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000328_1343488.pth... -[2024-09-01 06:58:16,787][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000276_1130496.pth -[2024-09-01 06:58:17,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1343488. Throughput: 0: 219.0. Samples: 336906. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:58:17,052][00307] Avg episode reward: [(0, '7.362')] -[2024-09-01 06:58:22,047][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1347584. Throughput: 0: 216.0. Samples: 338322. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:22,055][00307] Avg episode reward: [(0, '7.383')] -[2024-09-01 06:58:24,819][04814] Updated weights for policy 0, policy_version 330 (0.1038) -[2024-09-01 06:58:27,049][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1351680. Throughput: 0: 223.5. Samples: 339660. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:27,060][00307] Avg episode reward: [(0, '7.867')] -[2024-09-01 06:58:30,696][04801] Saving new best policy, reward=7.867! -[2024-09-01 06:58:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1355776. Throughput: 0: 226.5. Samples: 340228. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:32,050][00307] Avg episode reward: [(0, '8.014')] -[2024-09-01 06:58:36,976][04801] Saving new best policy, reward=8.014! -[2024-09-01 06:58:37,048][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1359872. Throughput: 0: 208.6. Samples: 341374. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:37,055][00307] Avg episode reward: [(0, '7.982')] -[2024-09-01 06:58:42,047][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 1359872. Throughput: 0: 205.7. Samples: 342272. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:42,054][00307] Avg episode reward: [(0, '8.018')] -[2024-09-01 06:58:47,047][00307] Fps is (10 sec: 0.0, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 1359872. Throughput: 0: 193.2. Samples: 342276. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:58:47,061][00307] Avg episode reward: [(0, '8.018')] -[2024-09-01 06:58:52,046][00307] Fps is (10 sec: 409.6, 60 sec: 682.7, 300 sec: 847.0). Total num frames: 1363968. Throughput: 0: 168.4. Samples: 342640. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:58:52,049][00307] Avg episode reward: [(0, '8.154')] -[2024-09-01 06:58:54,640][04801] Saving new best policy, reward=8.018! -[2024-09-01 06:58:54,772][04801] Saving new best policy, reward=8.154! -[2024-09-01 06:58:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 1368064. Throughput: 0: 174.9. Samples: 344218. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:58:57,052][00307] Avg episode reward: [(0, '8.018')] -[2024-09-01 06:59:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 1372160. Throughput: 0: 173.7. Samples: 344720. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:59:02,055][00307] Avg episode reward: [(0, '7.931')] -[2024-09-01 06:59:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 847.0). Total num frames: 1376256. Throughput: 0: 168.0. Samples: 345880. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:59:07,054][00307] Avg episode reward: [(0, '7.894')] -[2024-09-01 06:59:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 847.0). Total num frames: 1380352. Throughput: 0: 171.4. Samples: 347372. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:59:12,057][00307] Avg episode reward: [(0, '7.882')] -[2024-09-01 06:59:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 847.0). Total num frames: 1384448. Throughput: 0: 170.8. Samples: 347914. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:59:17,056][00307] Avg episode reward: [(0, '7.925')] -[2024-09-01 06:59:21,796][04814] Updated weights for policy 0, policy_version 340 (0.1224) -[2024-09-01 06:59:22,047][00307] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 1392640. Throughput: 0: 177.9. Samples: 349380. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:59:22,056][00307] Avg episode reward: [(0, '7.947')] -[2024-09-01 06:59:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 833.1). Total num frames: 1392640. Throughput: 0: 184.8. Samples: 350586. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:59:27,054][00307] Avg episode reward: [(0, '8.102')] -[2024-09-01 06:59:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 1400832. Throughput: 0: 202.8. Samples: 351400. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:59:32,049][00307] Avg episode reward: [(0, '8.073')] -[2024-09-01 06:59:37,046][00307] Fps is (10 sec: 1228.8, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 1404928. Throughput: 0: 221.1. Samples: 352590. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:59:37,055][00307] Avg episode reward: [(0, '7.779')] -[2024-09-01 06:59:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 1409024. Throughput: 0: 210.1. Samples: 353672. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 06:59:42,054][00307] Avg episode reward: [(0, '7.939')] -[2024-09-01 06:59:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1413120. Throughput: 0: 218.6. Samples: 354556. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:59:47,049][00307] Avg episode reward: [(0, '8.129')] -[2024-09-01 06:59:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1417216. Throughput: 0: 226.9. Samples: 356090. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 06:59:52,048][00307] Avg episode reward: [(0, '8.087')] -[2024-09-01 06:59:57,047][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1421312. Throughput: 0: 217.2. Samples: 357146. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 06:59:57,056][00307] Avg episode reward: [(0, '8.373')] -[2024-09-01 07:00:00,005][04801] Saving new best policy, reward=8.373! -[2024-09-01 07:00:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1425408. Throughput: 0: 219.9. Samples: 357810. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 07:00:02,050][00307] Avg episode reward: [(0, '8.411')] -[2024-09-01 07:00:03,775][04801] Saving new best policy, reward=8.411! -[2024-09-01 07:00:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1429504. Throughput: 0: 219.2. Samples: 359246. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:00:07,048][00307] Avg episode reward: [(0, '8.428')] -[2024-09-01 07:00:08,161][04801] Saving new best policy, reward=8.428! -[2024-09-01 07:00:08,167][04814] Updated weights for policy 0, policy_version 350 (0.0702) -[2024-09-01 07:00:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1433600. Throughput: 0: 221.3. Samples: 360544. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:00:12,050][00307] Avg episode reward: [(0, '8.193')] -[2024-09-01 07:00:17,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1437696. Throughput: 0: 208.9. Samples: 360800. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 07:00:17,050][00307] Avg episode reward: [(0, '8.293')] -[2024-09-01 07:00:18,400][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000352_1441792.pth... -[2024-09-01 07:00:18,503][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000302_1236992.pth -[2024-09-01 07:00:21,483][04801] Signal inference workers to stop experience collection... (350 times) -[2024-09-01 07:00:21,510][04814] InferenceWorker_p0-w0: stopping experience collection (350 times) -[2024-09-01 07:00:22,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 1441792. Throughput: 0: 221.4. Samples: 362554. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 07:00:22,051][00307] Avg episode reward: [(0, '8.300')] -[2024-09-01 07:00:22,571][04801] Signal inference workers to resume experience collection... (350 times) -[2024-09-01 07:00:22,572][04814] InferenceWorker_p0-w0: resuming experience collection (350 times) -[2024-09-01 07:00:27,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1445888. Throughput: 0: 225.2. Samples: 363806. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:00:27,054][00307] Avg episode reward: [(0, '8.277')] -[2024-09-01 07:00:32,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 1449984. Throughput: 0: 215.8. Samples: 364268. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:00:32,049][00307] Avg episode reward: [(0, '8.523')] -[2024-09-01 07:00:37,047][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1454080. Throughput: 0: 212.9. Samples: 365672. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:00:37,059][00307] Avg episode reward: [(0, '8.631')] -[2024-09-01 07:00:37,099][04801] Saving new best policy, reward=8.523! -[2024-09-01 07:00:41,508][04801] Saving new best policy, reward=8.631! -[2024-09-01 07:00:42,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1462272. Throughput: 0: 221.9. Samples: 367130. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:00:42,054][00307] Avg episode reward: [(0, '8.381')] -[2024-09-01 07:00:47,046][00307] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1462272. Throughput: 0: 218.3. Samples: 367632. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:00:47,050][00307] Avg episode reward: [(0, '8.580')] -[2024-09-01 07:00:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1470464. Throughput: 0: 211.8. Samples: 368776. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:00:52,049][00307] Avg episode reward: [(0, '8.550')] -[2024-09-01 07:00:56,321][04814] Updated weights for policy 0, policy_version 360 (0.0549) -[2024-09-01 07:00:57,046][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1474560. Throughput: 0: 212.8. Samples: 370122. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:00:57,049][00307] Avg episode reward: [(0, '8.633')] -[2024-09-01 07:01:01,331][04801] Saving new best policy, reward=8.633! -[2024-09-01 07:01:02,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1478656. Throughput: 0: 224.4. Samples: 370896. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:01:02,051][00307] Avg episode reward: [(0, '8.621')] -[2024-09-01 07:01:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1482752. Throughput: 0: 207.6. Samples: 371894. Policy #0 lag: (min: 1.0, avg: 1.1, max: 2.0) -[2024-09-01 07:01:07,057][00307] Avg episode reward: [(0, '8.660')] -[2024-09-01 07:01:10,674][04801] Saving new best policy, reward=8.660! -[2024-09-01 07:01:12,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1486848. Throughput: 0: 212.4. Samples: 373364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:01:12,048][00307] Avg episode reward: [(0, '8.685')] -[2024-09-01 07:01:14,812][04801] Saving new best policy, reward=8.685! -[2024-09-01 07:01:17,049][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 847.0). Total num frames: 1490944. Throughput: 0: 216.7. Samples: 374020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:01:17,052][00307] Avg episode reward: [(0, '8.791')] -[2024-09-01 07:01:21,392][04801] Saving new best policy, reward=8.791! -[2024-09-01 07:01:22,048][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 847.0). Total num frames: 1495040. Throughput: 0: 212.7. Samples: 375244. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 07:01:22,054][00307] Avg episode reward: [(0, '9.080')] -[2024-09-01 07:01:25,429][04801] Saving new best policy, reward=9.080! -[2024-09-01 07:01:27,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1499136. Throughput: 0: 207.5. Samples: 376466. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 07:01:27,049][00307] Avg episode reward: [(0, '9.598')] -[2024-09-01 07:01:29,870][04801] Saving new best policy, reward=9.598! -[2024-09-01 07:01:32,046][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1503232. Throughput: 0: 213.2. Samples: 377224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:01:32,049][00307] Avg episode reward: [(0, '9.573')] -[2024-09-01 07:01:37,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1507328. Throughput: 0: 211.9. Samples: 378312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:01:37,053][00307] Avg episode reward: [(0, '9.372')] -[2024-09-01 07:01:42,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 1511424. Throughput: 0: 211.8. Samples: 379654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:01:42,050][00307] Avg episode reward: [(0, '9.218')] -[2024-09-01 07:01:45,015][04814] Updated weights for policy 0, policy_version 370 (0.1080) -[2024-09-01 07:01:47,046][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1515520. Throughput: 0: 208.5. Samples: 380280. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:01:47,051][00307] Avg episode reward: [(0, '9.132')] -[2024-09-01 07:01:52,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.0). Total num frames: 1519616. Throughput: 0: 220.1. Samples: 381800. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:01:52,053][00307] Avg episode reward: [(0, '9.043')] -[2024-09-01 07:01:57,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1523712. Throughput: 0: 211.6. Samples: 382884. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:01:57,055][00307] Avg episode reward: [(0, '9.055')] -[2024-09-01 07:02:02,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1527808. Throughput: 0: 211.6. Samples: 383542. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:02:02,050][00307] Avg episode reward: [(0, '9.068')] -[2024-09-01 07:02:07,046][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1531904. Throughput: 0: 222.9. Samples: 385274. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:02:07,052][00307] Avg episode reward: [(0, '9.133')] -[2024-09-01 07:02:12,047][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 833.1). Total num frames: 1536000. Throughput: 0: 210.8. Samples: 385952. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:02:12,051][00307] Avg episode reward: [(0, '9.096')] -[2024-09-01 07:02:12,061][00307] Components not started: RolloutWorker_w1, RolloutWorker_w5, RolloutWorker_w6, wait_time=1800.0 seconds -[2024-09-01 07:02:12,069][00307] Components take too long to start: RolloutWorker_w1, RolloutWorker_w5, RolloutWorker_w6. Aborting the experiment! - - - -[2024-09-01 07:02:12,075][04801] Stopping Batcher_0... -[2024-09-01 07:02:12,075][04801] Loop batcher_evt_loop terminating... -[2024-09-01 07:02:12,078][00307] Component Batcher_0 stopped! -[2024-09-01 07:02:12,087][00307] Component RolloutWorker_w1 process died already! Don't wait for it. -[2024-09-01 07:02:12,092][00307] Component RolloutWorker_w5 process died already! Don't wait for it. -[2024-09-01 07:02:12,100][00307] Component RolloutWorker_w6 process died already! Don't wait for it. -[2024-09-01 07:02:12,106][00307] Waiting for ['LearnerWorker_p0', 'InferenceWorker_p0-w0', 'RolloutWorker_w0', 'RolloutWorker_w2', 'RolloutWorker_w3', 'RolloutWorker_w4', 'RolloutWorker_w7'] to stop... -[2024-09-01 07:02:12,264][04814] Weights refcount: 2 0 -[2024-09-01 07:02:12,274][00307] Component InferenceWorker_p0-w0 stopped! -[2024-09-01 07:02:12,281][00307] Waiting for ['LearnerWorker_p0', 'RolloutWorker_w0', 'RolloutWorker_w2', 'RolloutWorker_w3', 'RolloutWorker_w4', 'RolloutWorker_w7'] to stop... -[2024-09-01 07:02:12,302][04814] Stopping InferenceWorker_p0-w0... -[2024-09-01 07:02:12,308][04814] Loop inference_proc0-0_evt_loop terminating... -[2024-09-01 07:02:12,691][04822] Stopping RolloutWorker_w7... -[2024-09-01 07:02:12,690][00307] Component RolloutWorker_w7 stopped! -[2024-09-01 07:02:12,694][00307] Waiting for ['LearnerWorker_p0', 'RolloutWorker_w0', 'RolloutWorker_w2', 'RolloutWorker_w3', 'RolloutWorker_w4'] to stop... -[2024-09-01 07:02:12,692][04822] Loop rollout_proc7_evt_loop terminating... -[2024-09-01 07:02:12,746][00307] Component RolloutWorker_w2 stopped! -[2024-09-01 07:02:12,749][00307] Waiting for ['LearnerWorker_p0', 'RolloutWorker_w0', 'RolloutWorker_w3', 'RolloutWorker_w4'] to stop... -[2024-09-01 07:02:12,748][04818] Stopping RolloutWorker_w3... -[2024-09-01 07:02:12,754][04818] Loop rollout_proc3_evt_loop terminating... -[2024-09-01 07:02:12,759][04816] Stopping RolloutWorker_w2... -[2024-09-01 07:02:12,760][04816] Loop rollout_proc2_evt_loop terminating... -[2024-09-01 07:02:12,752][00307] Component RolloutWorker_w3 stopped! -[2024-09-01 07:02:12,763][00307] Waiting for ['LearnerWorker_p0', 'RolloutWorker_w0', 'RolloutWorker_w4'] to stop... -[2024-09-01 07:02:12,783][00307] Component RolloutWorker_w4 stopped! -[2024-09-01 07:02:12,790][04819] Stopping RolloutWorker_w4... -[2024-09-01 07:02:12,791][04819] Loop rollout_proc4_evt_loop terminating... -[2024-09-01 07:02:12,785][00307] Waiting for ['LearnerWorker_p0', 'RolloutWorker_w0'] to stop... -[2024-09-01 07:02:12,828][00307] Component RolloutWorker_w0 stopped! -[2024-09-01 07:02:12,831][00307] Waiting for ['LearnerWorker_p0'] to stop... -[2024-09-01 07:02:12,837][04815] Stopping RolloutWorker_w0... -[2024-09-01 07:02:12,838][04815] Loop rollout_proc0_evt_loop terminating... -[2024-09-01 07:02:13,369][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:02:13,491][04801] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000328_1343488.pth -[2024-09-01 07:02:13,508][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:02:13,686][04801] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:02:13,857][04801] Stopping LearnerWorker_p0... -[2024-09-01 07:02:13,857][04801] Loop learner_proc0_evt_loop terminating... -[2024-09-01 07:02:13,859][00307] Component LearnerWorker_p0 stopped! -[2024-09-01 07:02:13,867][00307] Waiting for process learner_proc0 to stop... -[2024-09-01 07:02:14,692][00307] Waiting for process inference_proc0-0 to join... -[2024-09-01 07:02:14,704][00307] Waiting for process rollout_proc0 to join... -[2024-09-01 07:02:15,571][00307] Waiting for process rollout_proc1 to join... -[2024-09-01 07:02:15,574][00307] Waiting for process rollout_proc2 to join... -[2024-09-01 07:02:15,578][00307] Waiting for process rollout_proc3 to join... -[2024-09-01 07:02:15,587][00307] Waiting for process rollout_proc4 to join... -[2024-09-01 07:02:15,592][00307] Waiting for process rollout_proc5 to join... -[2024-09-01 07:02:15,594][00307] Waiting for process rollout_proc6 to join... -[2024-09-01 07:02:15,598][00307] Waiting for process rollout_proc7 to join... -[2024-09-01 07:02:15,601][00307] Batcher 0 profile tree view: -batching: 7.2864, releasing_batches: 0.1124 -[2024-09-01 07:02:15,607][00307] InferenceWorker_p0-w0 profile tree view: +[2024-09-01 14:50:33,777][03040] Worker 5 uses CPU cores [1] +[2024-09-01 14:50:34,292][03021] Using optimizer +[2024-09-01 14:50:34,293][03021] No checkpoints found +[2024-09-01 14:50:34,294][03021] Did not load from checkpoint, starting from scratch! +[2024-09-01 14:50:34,294][03021] Initialized policy 0 weights for model version 0 +[2024-09-01 14:50:34,297][03021] LearnerWorker_p0 finished initialization! +[2024-09-01 14:50:34,305][03034] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 14:50:34,307][03034] RunningMeanStd input shape: (1,) +[2024-09-01 14:50:34,333][03034] ConvEncoder: input_channels=3 +[2024-09-01 14:50:34,490][03034] Conv encoder output size: 512 +[2024-09-01 14:50:34,490][03034] Policy head output size: 512 +[2024-09-01 14:50:34,512][00194] Inference worker 0-0 is ready! +[2024-09-01 14:50:34,514][00194] All inference workers are ready! Signal rollout workers to start! +[2024-09-01 14:50:34,598][03038] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,599][03040] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,601][03042] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,597][03036] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,613][03035] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,610][03039] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,625][03037] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:34,627][03041] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 14:50:35,136][00194] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 14:50:35,670][03037] Decorrelating experience for 0 frames... +[2024-09-01 14:50:36,360][03038] Decorrelating experience for 0 frames... +[2024-09-01 14:50:36,356][03040] Decorrelating experience for 0 frames... +[2024-09-01 14:50:36,363][03042] Decorrelating experience for 0 frames... +[2024-09-01 14:50:36,364][03036] Decorrelating experience for 0 frames... +[2024-09-01 14:50:36,675][03037] Decorrelating experience for 32 frames... +[2024-09-01 14:50:36,762][03041] Decorrelating experience for 0 frames... +[2024-09-01 14:50:37,247][03038] Decorrelating experience for 32 frames... +[2024-09-01 14:50:37,249][03036] Decorrelating experience for 32 frames... +[2024-09-01 14:50:37,842][03042] Decorrelating experience for 32 frames... +[2024-09-01 14:50:37,948][03039] Decorrelating experience for 0 frames... +[2024-09-01 14:50:38,398][03041] Decorrelating experience for 32 frames... +[2024-09-01 14:50:38,595][03037] Decorrelating experience for 64 frames... +[2024-09-01 14:50:38,817][00194] Heartbeat connected on Batcher_0 +[2024-09-01 14:50:38,824][00194] Heartbeat connected on LearnerWorker_p0 +[2024-09-01 14:50:38,886][00194] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-01 14:50:39,168][03036] Decorrelating experience for 64 frames... +[2024-09-01 14:50:39,403][03035] Decorrelating experience for 0 frames... +[2024-09-01 14:50:39,515][03042] Decorrelating experience for 64 frames... +[2024-09-01 14:50:40,136][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 14:50:40,414][03039] Decorrelating experience for 32 frames... +[2024-09-01 14:50:40,550][03038] Decorrelating experience for 64 frames... +[2024-09-01 14:50:41,222][03037] Decorrelating experience for 96 frames... +[2024-09-01 14:50:41,279][03040] Decorrelating experience for 32 frames... +[2024-09-01 14:50:41,444][03041] Decorrelating experience for 64 frames... +[2024-09-01 14:50:41,490][03042] Decorrelating experience for 96 frames... +[2024-09-01 14:50:41,575][00194] Heartbeat connected on RolloutWorker_w2 +[2024-09-01 14:50:41,671][03035] Decorrelating experience for 32 frames... +[2024-09-01 14:50:41,777][00194] Heartbeat connected on RolloutWorker_w7 +[2024-09-01 14:50:42,279][03038] Decorrelating experience for 96 frames... +[2024-09-01 14:50:42,726][03039] Decorrelating experience for 64 frames... +[2024-09-01 14:50:42,873][00194] Heartbeat connected on RolloutWorker_w3 +[2024-09-01 14:50:43,857][03036] Decorrelating experience for 96 frames... +[2024-09-01 14:50:44,758][00194] Heartbeat connected on RolloutWorker_w1 +[2024-09-01 14:50:45,136][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 39.4. Samples: 394. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 14:50:45,144][00194] Avg episode reward: [(0, '1.813')] +[2024-09-01 14:50:46,376][03041] Decorrelating experience for 96 frames... +[2024-09-01 14:50:46,844][03039] Decorrelating experience for 96 frames... +[2024-09-01 14:50:47,585][00194] Heartbeat connected on RolloutWorker_w6 +[2024-09-01 14:50:48,589][00194] Heartbeat connected on RolloutWorker_w4 +[2024-09-01 14:50:48,663][03040] Decorrelating experience for 64 frames... +[2024-09-01 14:50:50,136][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 104.9. Samples: 1574. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 14:50:50,140][00194] Avg episode reward: [(0, '2.296')] +[2024-09-01 14:50:52,661][03035] Decorrelating experience for 64 frames... +[2024-09-01 14:50:53,342][03021] Signal inference workers to stop experience collection... +[2024-09-01 14:50:53,395][03034] InferenceWorker_p0-w0: stopping experience collection +[2024-09-01 14:50:53,479][03040] Decorrelating experience for 96 frames... +[2024-09-01 14:50:53,575][00194] Heartbeat connected on RolloutWorker_w5 +[2024-09-01 14:50:53,977][03035] Decorrelating experience for 96 frames... +[2024-09-01 14:50:54,080][00194] Heartbeat connected on RolloutWorker_w0 +[2024-09-01 14:50:54,301][03021] Signal inference workers to resume experience collection... +[2024-09-01 14:50:54,302][03034] InferenceWorker_p0-w0: resuming experience collection +[2024-09-01 14:50:55,136][00194] Fps is (10 sec: 409.6, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 4096. Throughput: 0: 109.6. Samples: 2192. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 14:50:55,138][00194] Avg episode reward: [(0, '2.547')] +[2024-09-01 14:51:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 327.7, 300 sec: 327.7). Total num frames: 8192. Throughput: 0: 141.1. Samples: 3528. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 14:51:00,142][00194] Avg episode reward: [(0, '3.212')] +[2024-09-01 14:51:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 12288. Throughput: 0: 164.7. Samples: 4940. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:51:05,139][00194] Avg episode reward: [(0, '3.274')] +[2024-09-01 14:51:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 468.1, 300 sec: 468.1). Total num frames: 16384. Throughput: 0: 159.8. Samples: 5592. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:51:10,141][00194] Avg episode reward: [(0, '3.525')] +[2024-09-01 14:51:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 512.0, 300 sec: 512.0). Total num frames: 20480. Throughput: 0: 174.2. Samples: 6970. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:51:15,144][00194] Avg episode reward: [(0, '3.818')] +[2024-09-01 14:51:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 24576. Throughput: 0: 192.3. Samples: 8654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:51:20,140][00194] Avg episode reward: [(0, '3.829')] +[2024-09-01 14:51:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 573.4, 300 sec: 573.4). Total num frames: 28672. Throughput: 0: 201.0. Samples: 9046. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:25,145][00194] Avg episode reward: [(0, '3.873')] +[2024-09-01 14:51:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 670.3, 300 sec: 670.3). Total num frames: 36864. Throughput: 0: 223.3. Samples: 10442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:30,138][00194] Avg episode reward: [(0, '3.971')] +[2024-09-01 14:51:34,366][03034] Updated weights for policy 0, policy_version 10 (0.2578) +[2024-09-01 14:51:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 40960. Throughput: 0: 229.3. Samples: 11892. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:35,139][00194] Avg episode reward: [(0, '4.138')] +[2024-09-01 14:51:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 693.2). Total num frames: 45056. Throughput: 0: 236.0. Samples: 12814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:40,141][00194] Avg episode reward: [(0, '4.322')] +[2024-09-01 14:51:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 702.2). Total num frames: 49152. Throughput: 0: 228.5. Samples: 13812. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:45,139][00194] Avg episode reward: [(0, '4.340')] +[2024-09-01 14:51:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 710.0). Total num frames: 53248. Throughput: 0: 229.9. Samples: 15286. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:50,145][00194] Avg episode reward: [(0, '4.382')] +[2024-09-01 14:51:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 716.8). Total num frames: 57344. Throughput: 0: 235.4. Samples: 16184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:51:55,142][00194] Avg episode reward: [(0, '4.435')] +[2024-09-01 14:52:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 722.8). Total num frames: 61440. Throughput: 0: 232.4. Samples: 17426. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:00,143][00194] Avg episode reward: [(0, '4.432')] +[2024-09-01 14:52:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 728.2). Total num frames: 65536. Throughput: 0: 229.6. Samples: 18986. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:05,147][00194] Avg episode reward: [(0, '4.425')] +[2024-09-01 14:52:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 776.1). Total num frames: 73728. Throughput: 0: 236.8. Samples: 19700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:10,138][00194] Avg episode reward: [(0, '4.491')] +[2024-09-01 14:52:15,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 778.2). Total num frames: 77824. Throughput: 0: 235.0. Samples: 21018. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:52:15,140][00194] Avg episode reward: [(0, '4.481')] +[2024-09-01 14:52:19,754][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000020_81920.pth... +[2024-09-01 14:52:19,758][03034] Updated weights for policy 0, policy_version 20 (0.0527) +[2024-09-01 14:52:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 780.2). Total num frames: 81920. Throughput: 0: 225.6. Samples: 22042. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:52:20,138][00194] Avg episode reward: [(0, '4.519')] +[2024-09-01 14:52:25,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 782.0). Total num frames: 86016. Throughput: 0: 227.3. Samples: 23044. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:25,139][00194] Avg episode reward: [(0, '4.486')] +[2024-09-01 14:52:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 783.6). Total num frames: 90112. Throughput: 0: 240.9. Samples: 24652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:30,141][00194] Avg episode reward: [(0, '4.496')] +[2024-09-01 14:52:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 785.1). Total num frames: 94208. Throughput: 0: 233.6. Samples: 25800. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:52:35,141][00194] Avg episode reward: [(0, '4.528')] +[2024-09-01 14:52:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 786.4). Total num frames: 98304. Throughput: 0: 223.5. Samples: 26242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:52:40,145][00194] Avg episode reward: [(0, '4.456')] +[2024-09-01 14:52:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 106496. Throughput: 0: 238.5. Samples: 28160. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:45,151][00194] Avg episode reward: [(0, '4.433')] +[2024-09-01 14:52:49,237][03021] Saving new best policy, reward=4.433! +[2024-09-01 14:52:50,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 110592. Throughput: 0: 228.7. Samples: 29280. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:50,145][00194] Avg episode reward: [(0, '4.521')] +[2024-09-01 14:52:54,932][03021] Saving new best policy, reward=4.521! +[2024-09-01 14:52:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 114688. Throughput: 0: 228.9. Samples: 30000. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:52:55,143][00194] Avg episode reward: [(0, '4.529')] +[2024-09-01 14:52:58,673][03021] Saving new best policy, reward=4.529! +[2024-09-01 14:53:00,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 118784. Throughput: 0: 227.0. Samples: 31234. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:53:00,138][00194] Avg episode reward: [(0, '4.515')] +[2024-09-01 14:53:02,571][03034] Updated weights for policy 0, policy_version 30 (0.0582) +[2024-09-01 14:53:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 122880. Throughput: 0: 246.7. Samples: 33144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:53:05,141][00194] Avg episode reward: [(0, '4.548')] +[2024-09-01 14:53:10,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 126976. Throughput: 0: 231.1. Samples: 33446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:53:10,146][00194] Avg episode reward: [(0, '4.506')] +[2024-09-01 14:53:12,627][03021] Saving new best policy, reward=4.548! +[2024-09-01 14:53:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 131072. Throughput: 0: 222.7. Samples: 34672. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:53:15,144][00194] Avg episode reward: [(0, '4.561')] +[2024-09-01 14:53:20,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 135168. Throughput: 0: 236.1. Samples: 36424. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:53:20,143][00194] Avg episode reward: [(0, '4.552')] +[2024-09-01 14:53:20,380][03021] Saving new best policy, reward=4.561! +[2024-09-01 14:53:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 843.3). Total num frames: 143360. Throughput: 0: 247.9. Samples: 37396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:53:25,139][00194] Avg episode reward: [(0, '4.575')] +[2024-09-01 14:53:30,140][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 143360. Throughput: 0: 212.9. Samples: 37740. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:53:30,158][00194] Avg episode reward: [(0, '4.604')] +[2024-09-01 14:53:33,783][03021] Saving new best policy, reward=4.575! +[2024-09-01 14:53:33,922][03021] Saving new best policy, reward=4.604! +[2024-09-01 14:53:35,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 147456. Throughput: 0: 210.5. Samples: 38752. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:53:35,142][00194] Avg episode reward: [(0, '4.503')] +[2024-09-01 14:53:40,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 151552. Throughput: 0: 211.1. Samples: 39500. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:53:40,139][00194] Avg episode reward: [(0, '4.544')] +[2024-09-01 14:53:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 155648. Throughput: 0: 215.8. Samples: 40944. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:53:45,146][00194] Avg episode reward: [(0, '4.456')] +[2024-09-01 14:53:50,142][00194] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 819.2). Total num frames: 159744. Throughput: 0: 196.4. Samples: 41982. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 14:53:50,146][00194] Avg episode reward: [(0, '4.465')] +[2024-09-01 14:53:52,315][03034] Updated weights for policy 0, policy_version 40 (0.1638) +[2024-09-01 14:53:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 163840. Throughput: 0: 204.9. Samples: 42668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 14:53:55,143][00194] Avg episode reward: [(0, '4.601')] +[2024-09-01 14:54:00,136][00194] Fps is (10 sec: 1229.6, 60 sec: 887.5, 300 sec: 839.2). Total num frames: 172032. Throughput: 0: 220.0. Samples: 44574. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 14:54:00,142][00194] Avg episode reward: [(0, '4.532')] +[2024-09-01 14:54:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 172032. Throughput: 0: 203.4. Samples: 45576. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 14:54:05,138][00194] Avg episode reward: [(0, '4.487')] +[2024-09-01 14:54:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 838.3). Total num frames: 180224. Throughput: 0: 194.0. Samples: 46128. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:54:10,146][00194] Avg episode reward: [(0, '4.467')] +[2024-09-01 14:54:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 837.8). Total num frames: 184320. Throughput: 0: 219.1. Samples: 47598. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:54:15,143][00194] Avg episode reward: [(0, '4.396')] +[2024-09-01 14:54:17,607][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000046_188416.pth... +[2024-09-01 14:54:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 837.4). Total num frames: 188416. Throughput: 0: 231.2. Samples: 49154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:54:20,143][00194] Avg episode reward: [(0, '4.467')] +[2024-09-01 14:54:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 837.0). Total num frames: 192512. Throughput: 0: 223.1. Samples: 49540. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:54:25,144][00194] Avg episode reward: [(0, '4.449')] +[2024-09-01 14:54:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 836.6). Total num frames: 196608. Throughput: 0: 216.7. Samples: 50696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:54:30,139][00194] Avg episode reward: [(0, '4.457')] +[2024-09-01 14:54:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 836.3). Total num frames: 200704. Throughput: 0: 236.7. Samples: 52630. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:54:35,139][00194] Avg episode reward: [(0, '4.421')] +[2024-09-01 14:54:36,014][03034] Updated weights for policy 0, policy_version 50 (0.1682) +[2024-09-01 14:54:40,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 835.9). Total num frames: 204800. Throughput: 0: 229.6. Samples: 53002. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:54:40,148][00194] Avg episode reward: [(0, '4.315')] +[2024-09-01 14:54:40,302][03021] Signal inference workers to stop experience collection... (50 times) +[2024-09-01 14:54:40,427][03034] InferenceWorker_p0-w0: stopping experience collection (50 times) +[2024-09-01 14:54:41,141][03021] Signal inference workers to resume experience collection... (50 times) +[2024-09-01 14:54:41,142][03034] InferenceWorker_p0-w0: resuming experience collection (50 times) +[2024-09-01 14:54:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 835.6). Total num frames: 208896. Throughput: 0: 215.8. Samples: 54286. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:54:45,141][00194] Avg episode reward: [(0, '4.328')] +[2024-09-01 14:54:50,136][00194] Fps is (10 sec: 1229.6, 60 sec: 955.8, 300 sec: 851.3). Total num frames: 217088. Throughput: 0: 225.6. Samples: 55728. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:54:50,140][00194] Avg episode reward: [(0, '4.300')] +[2024-09-01 14:54:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 850.7). Total num frames: 221184. Throughput: 0: 235.6. Samples: 56730. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:54:55,144][00194] Avg episode reward: [(0, '4.263')] +[2024-09-01 14:55:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 850.1). Total num frames: 225280. Throughput: 0: 225.9. Samples: 57762. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:00,140][00194] Avg episode reward: [(0, '4.302')] +[2024-09-01 14:55:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 849.5). Total num frames: 229376. Throughput: 0: 222.1. Samples: 59150. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:05,138][00194] Avg episode reward: [(0, '4.240')] +[2024-09-01 14:55:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 849.0). Total num frames: 233472. Throughput: 0: 229.4. Samples: 59864. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:10,142][00194] Avg episode reward: [(0, '4.327')] +[2024-09-01 14:55:15,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 848.5). Total num frames: 237568. Throughput: 0: 239.0. Samples: 61452. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:15,140][00194] Avg episode reward: [(0, '4.340')] +[2024-09-01 14:55:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.9). Total num frames: 241664. Throughput: 0: 220.1. Samples: 62536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:20,139][00194] Avg episode reward: [(0, '4.356')] +[2024-09-01 14:55:21,561][03034] Updated weights for policy 0, policy_version 60 (0.0050) +[2024-09-01 14:55:25,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 847.4). Total num frames: 245760. Throughput: 0: 229.6. Samples: 63334. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:25,145][00194] Avg episode reward: [(0, '4.346')] +[2024-09-01 14:55:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 860.9). Total num frames: 253952. Throughput: 0: 236.7. Samples: 64938. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:30,139][00194] Avg episode reward: [(0, '4.353')] +[2024-09-01 14:55:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 258048. Throughput: 0: 228.0. Samples: 65990. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:35,139][00194] Avg episode reward: [(0, '4.448')] +[2024-09-01 14:55:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 888.6). Total num frames: 262144. Throughput: 0: 219.6. Samples: 66614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:40,140][00194] Avg episode reward: [(0, '4.677')] +[2024-09-01 14:55:43,065][03021] Saving new best policy, reward=4.677! +[2024-09-01 14:55:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 266240. Throughput: 0: 233.9. Samples: 68286. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:45,145][00194] Avg episode reward: [(0, '4.605')] +[2024-09-01 14:55:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 270336. Throughput: 0: 230.7. Samples: 69530. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:55:50,140][00194] Avg episode reward: [(0, '4.674')] +[2024-09-01 14:55:55,141][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 274432. Throughput: 0: 227.6. Samples: 70106. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:55:55,149][00194] Avg episode reward: [(0, '4.661')] +[2024-09-01 14:56:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 278528. Throughput: 0: 226.2. Samples: 71630. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:00,144][00194] Avg episode reward: [(0, '4.700')] +[2024-09-01 14:56:04,897][03021] Saving new best policy, reward=4.700! +[2024-09-01 14:56:04,901][03034] Updated weights for policy 0, policy_version 70 (0.1759) +[2024-09-01 14:56:05,136][00194] Fps is (10 sec: 1229.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 286720. Throughput: 0: 237.5. Samples: 73224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:05,139][00194] Avg episode reward: [(0, '4.741')] +[2024-09-01 14:56:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 286720. Throughput: 0: 236.0. Samples: 73956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:10,139][00194] Avg episode reward: [(0, '4.723')] +[2024-09-01 14:56:10,600][03021] Saving new best policy, reward=4.741! +[2024-09-01 14:56:15,137][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 290816. Throughput: 0: 224.3. Samples: 75032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:15,147][00194] Avg episode reward: [(0, '4.737')] +[2024-09-01 14:56:19,195][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000073_299008.pth... +[2024-09-01 14:56:19,305][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000020_81920.pth +[2024-09-01 14:56:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 299008. Throughput: 0: 232.8. Samples: 76464. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:20,143][00194] Avg episode reward: [(0, '4.726')] +[2024-09-01 14:56:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 303104. Throughput: 0: 237.9. Samples: 77320. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:25,139][00194] Avg episode reward: [(0, '4.575')] +[2024-09-01 14:56:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 307200. Throughput: 0: 221.5. Samples: 78252. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:30,138][00194] Avg episode reward: [(0, '4.571')] +[2024-09-01 14:56:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 311296. Throughput: 0: 230.0. Samples: 79880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:56:35,145][00194] Avg episode reward: [(0, '4.496')] +[2024-09-01 14:56:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 315392. Throughput: 0: 229.3. Samples: 80424. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 14:56:40,143][00194] Avg episode reward: [(0, '4.360')] +[2024-09-01 14:56:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 319488. Throughput: 0: 231.6. Samples: 82050. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 14:56:45,139][00194] Avg episode reward: [(0, '4.360')] +[2024-09-01 14:56:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 323584. Throughput: 0: 222.3. Samples: 83226. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:56:50,144][00194] Avg episode reward: [(0, '4.339')] +[2024-09-01 14:56:51,612][03034] Updated weights for policy 0, policy_version 80 (0.1042) +[2024-09-01 14:56:55,148][00194] Fps is (10 sec: 1227.4, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 331776. Throughput: 0: 218.5. Samples: 83792. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:56:55,153][00194] Avg episode reward: [(0, '4.330')] +[2024-09-01 14:57:00,139][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 335872. Throughput: 0: 232.3. Samples: 85486. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:00,146][00194] Avg episode reward: [(0, '4.269')] +[2024-09-01 14:57:05,136][00194] Fps is (10 sec: 820.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 339968. Throughput: 0: 222.7. Samples: 86486. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:05,143][00194] Avg episode reward: [(0, '4.248')] +[2024-09-01 14:57:10,136][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 344064. Throughput: 0: 219.2. Samples: 87186. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:10,142][00194] Avg episode reward: [(0, '4.262')] +[2024-09-01 14:57:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 348160. Throughput: 0: 232.4. Samples: 88712. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:57:15,138][00194] Avg episode reward: [(0, '4.427')] +[2024-09-01 14:57:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 352256. Throughput: 0: 228.6. Samples: 90166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:57:20,140][00194] Avg episode reward: [(0, '4.391')] +[2024-09-01 14:57:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 356352. Throughput: 0: 225.1. Samples: 90554. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:25,139][00194] Avg episode reward: [(0, '4.470')] +[2024-09-01 14:57:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 360448. Throughput: 0: 225.0. Samples: 92176. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:30,144][00194] Avg episode reward: [(0, '4.480')] +[2024-09-01 14:57:34,731][03034] Updated weights for policy 0, policy_version 90 (0.1892) +[2024-09-01 14:57:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 368640. Throughput: 0: 232.0. Samples: 93668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:35,139][00194] Avg episode reward: [(0, '4.562')] +[2024-09-01 14:57:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 368640. Throughput: 0: 234.6. Samples: 94344. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:57:40,140][00194] Avg episode reward: [(0, '4.613')] +[2024-09-01 14:57:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 376832. Throughput: 0: 225.3. Samples: 95622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:57:45,143][00194] Avg episode reward: [(0, '4.710')] +[2024-09-01 14:57:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 380928. Throughput: 0: 234.0. Samples: 97016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:57:50,139][00194] Avg episode reward: [(0, '4.616')] +[2024-09-01 14:57:55,140][00194] Fps is (10 sec: 818.9, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 385024. Throughput: 0: 234.4. Samples: 97734. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:57:55,144][00194] Avg episode reward: [(0, '4.667')] +[2024-09-01 14:58:00,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 389120. Throughput: 0: 224.6. Samples: 98818. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:58:00,141][00194] Avg episode reward: [(0, '4.595')] +[2024-09-01 14:58:05,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 393216. Throughput: 0: 231.3. Samples: 100574. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:58:05,142][00194] Avg episode reward: [(0, '4.618')] +[2024-09-01 14:58:10,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 397312. Throughput: 0: 234.4. Samples: 101100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:10,145][00194] Avg episode reward: [(0, '4.595')] +[2024-09-01 14:58:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 401408. Throughput: 0: 234.8. Samples: 102744. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:15,141][00194] Avg episode reward: [(0, '4.580')] +[2024-09-01 14:58:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 405504. Throughput: 0: 226.3. Samples: 103850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:58:20,144][00194] Avg episode reward: [(0, '4.550')] +[2024-09-01 14:58:20,574][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000100_409600.pth... +[2024-09-01 14:58:20,580][03034] Updated weights for policy 0, policy_version 100 (0.1151) +[2024-09-01 14:58:20,680][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000046_188416.pth +[2024-09-01 14:58:22,878][03021] Signal inference workers to stop experience collection... (100 times) +[2024-09-01 14:58:22,935][03034] InferenceWorker_p0-w0: stopping experience collection (100 times) +[2024-09-01 14:58:24,391][03021] Signal inference workers to resume experience collection... (100 times) +[2024-09-01 14:58:24,392][03034] InferenceWorker_p0-w0: resuming experience collection (100 times) +[2024-09-01 14:58:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 413696. Throughput: 0: 230.1. Samples: 104698. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:25,141][00194] Avg episode reward: [(0, '4.563')] +[2024-09-01 14:58:30,141][00194] Fps is (10 sec: 1228.1, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 417792. Throughput: 0: 230.9. Samples: 106016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:30,152][00194] Avg episode reward: [(0, '4.566')] +[2024-09-01 14:58:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 421888. Throughput: 0: 225.6. Samples: 107168. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:58:35,139][00194] Avg episode reward: [(0, '4.687')] +[2024-09-01 14:58:40,136][00194] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 425984. Throughput: 0: 228.2. Samples: 108000. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 14:58:40,143][00194] Avg episode reward: [(0, '4.638')] +[2024-09-01 14:58:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 430080. Throughput: 0: 237.9. Samples: 109524. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:45,139][00194] Avg episode reward: [(0, '4.693')] +[2024-09-01 14:58:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 434176. Throughput: 0: 230.9. Samples: 110966. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:58:50,141][00194] Avg episode reward: [(0, '4.693')] +[2024-09-01 14:58:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 438272. Throughput: 0: 227.2. Samples: 111322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:58:55,143][00194] Avg episode reward: [(0, '4.664')] +[2024-09-01 14:59:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 442368. Throughput: 0: 222.4. Samples: 112750. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:00,146][00194] Avg episode reward: [(0, '4.659')] +[2024-09-01 14:59:04,236][03034] Updated weights for policy 0, policy_version 110 (0.1529) +[2024-09-01 14:59:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 450560. Throughput: 0: 232.6. Samples: 114318. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:05,140][00194] Avg episode reward: [(0, '4.645')] +[2024-09-01 14:59:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 454656. Throughput: 0: 231.7. Samples: 115126. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:10,140][00194] Avg episode reward: [(0, '4.667')] +[2024-09-01 14:59:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 458752. Throughput: 0: 227.5. Samples: 116250. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:15,140][00194] Avg episode reward: [(0, '4.686')] +[2024-09-01 14:59:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 462848. Throughput: 0: 235.5. Samples: 117764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:20,139][00194] Avg episode reward: [(0, '4.675')] +[2024-09-01 14:59:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 466944. Throughput: 0: 229.6. Samples: 118330. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:25,146][00194] Avg episode reward: [(0, '4.773')] +[2024-09-01 14:59:28,465][03021] Saving new best policy, reward=4.773! +[2024-09-01 14:59:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 471040. Throughput: 0: 218.7. Samples: 119364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:30,142][00194] Avg episode reward: [(0, '4.739')] +[2024-09-01 14:59:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 475136. Throughput: 0: 220.4. Samples: 120884. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:35,143][00194] Avg episode reward: [(0, '4.641')] +[2024-09-01 14:59:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 479232. Throughput: 0: 228.1. Samples: 121586. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:40,141][00194] Avg episode reward: [(0, '4.693')] +[2024-09-01 14:59:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 483328. Throughput: 0: 229.9. Samples: 123094. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:45,139][00194] Avg episode reward: [(0, '4.684')] +[2024-09-01 14:59:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 487424. Throughput: 0: 219.0. Samples: 124172. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 14:59:50,143][00194] Avg episode reward: [(0, '4.749')] +[2024-09-01 14:59:51,140][03034] Updated weights for policy 0, policy_version 120 (0.1018) +[2024-09-01 14:59:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 495616. Throughput: 0: 218.4. Samples: 124956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 14:59:55,138][00194] Avg episode reward: [(0, '4.674')] +[2024-09-01 15:00:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 499712. Throughput: 0: 224.7. Samples: 126362. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:00:00,142][00194] Avg episode reward: [(0, '4.494')] +[2024-09-01 15:00:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 503808. Throughput: 0: 216.9. Samples: 127524. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:05,140][00194] Avg episode reward: [(0, '4.494')] +[2024-09-01 15:00:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 507904. Throughput: 0: 221.1. Samples: 128280. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:10,139][00194] Avg episode reward: [(0, '4.457')] +[2024-09-01 15:00:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 512000. Throughput: 0: 231.3. Samples: 129772. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:15,138][00194] Avg episode reward: [(0, '4.464')] +[2024-09-01 15:00:20,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 516096. Throughput: 0: 232.2. Samples: 131334. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:20,145][00194] Avg episode reward: [(0, '4.520')] +[2024-09-01 15:00:21,930][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000127_520192.pth... +[2024-09-01 15:00:22,073][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000073_299008.pth +[2024-09-01 15:00:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 520192. Throughput: 0: 223.6. Samples: 131648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:25,139][00194] Avg episode reward: [(0, '4.569')] +[2024-09-01 15:00:30,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 524288. Throughput: 0: 225.9. Samples: 133258. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:30,141][00194] Avg episode reward: [(0, '4.595')] +[2024-09-01 15:00:34,393][03034] Updated weights for policy 0, policy_version 130 (0.0539) +[2024-09-01 15:00:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 532480. Throughput: 0: 232.9. Samples: 134654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:35,138][00194] Avg episode reward: [(0, '4.611')] +[2024-09-01 15:00:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 536576. Throughput: 0: 233.0. Samples: 135442. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:40,140][00194] Avg episode reward: [(0, '4.575')] +[2024-09-01 15:00:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 540672. Throughput: 0: 227.2. Samples: 136588. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:45,144][00194] Avg episode reward: [(0, '4.644')] +[2024-09-01 15:00:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 544768. Throughput: 0: 234.0. Samples: 138056. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:50,138][00194] Avg episode reward: [(0, '4.662')] +[2024-09-01 15:00:55,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 548864. Throughput: 0: 233.1. Samples: 138768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:00:55,144][00194] Avg episode reward: [(0, '4.767')] +[2024-09-01 15:01:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 552960. Throughput: 0: 222.2. Samples: 139772. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:00,142][00194] Avg episode reward: [(0, '4.729')] +[2024-09-01 15:01:05,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 557056. Throughput: 0: 227.7. Samples: 141580. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:05,139][00194] Avg episode reward: [(0, '4.637')] +[2024-09-01 15:01:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 565248. Throughput: 0: 236.0. Samples: 142268. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:01:10,138][00194] Avg episode reward: [(0, '4.574')] +[2024-09-01 15:01:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 565248. Throughput: 0: 233.5. Samples: 143766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:01:15,139][00194] Avg episode reward: [(0, '4.515')] +[2024-09-01 15:01:20,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 569344. Throughput: 0: 225.2. Samples: 144786. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:20,144][00194] Avg episode reward: [(0, '4.487')] +[2024-09-01 15:01:20,351][03034] Updated weights for policy 0, policy_version 140 (0.1177) +[2024-09-01 15:01:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 577536. Throughput: 0: 229.0. Samples: 145746. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:25,144][00194] Avg episode reward: [(0, '4.569')] +[2024-09-01 15:01:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 581632. Throughput: 0: 235.0. Samples: 147164. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:30,143][00194] Avg episode reward: [(0, '4.565')] +[2024-09-01 15:01:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 585728. Throughput: 0: 226.4. Samples: 148242. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:35,147][00194] Avg episode reward: [(0, '4.511')] +[2024-09-01 15:01:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 589824. Throughput: 0: 225.3. Samples: 148904. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:40,141][00194] Avg episode reward: [(0, '4.510')] +[2024-09-01 15:01:45,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 593920. Throughput: 0: 238.1. Samples: 150488. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:45,142][00194] Avg episode reward: [(0, '4.465')] +[2024-09-01 15:01:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 598016. Throughput: 0: 230.5. Samples: 151954. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:50,142][00194] Avg episode reward: [(0, '4.498')] +[2024-09-01 15:01:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 602112. Throughput: 0: 223.6. Samples: 152328. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:01:55,139][00194] Avg episode reward: [(0, '4.447')] +[2024-09-01 15:02:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 610304. Throughput: 0: 227.8. Samples: 154016. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:00,139][00194] Avg episode reward: [(0, '4.431')] +[2024-09-01 15:02:03,616][03034] Updated weights for policy 0, policy_version 150 (0.1017) +[2024-09-01 15:02:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 614400. Throughput: 0: 238.2. Samples: 155506. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:05,138][00194] Avg episode reward: [(0, '4.470')] +[2024-09-01 15:02:07,017][03021] Signal inference workers to stop experience collection... (150 times) +[2024-09-01 15:02:07,091][03034] InferenceWorker_p0-w0: stopping experience collection (150 times) +[2024-09-01 15:02:08,870][03021] Signal inference workers to resume experience collection... (150 times) +[2024-09-01 15:02:08,878][03034] InferenceWorker_p0-w0: resuming experience collection (150 times) +[2024-09-01 15:02:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 618496. Throughput: 0: 229.7. Samples: 156084. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:10,143][00194] Avg episode reward: [(0, '4.466')] +[2024-09-01 15:02:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 622592. Throughput: 0: 220.9. Samples: 157106. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:15,138][00194] Avg episode reward: [(0, '4.473')] +[2024-09-01 15:02:17,310][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000153_626688.pth... +[2024-09-01 15:02:17,393][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000100_409600.pth +[2024-09-01 15:02:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 626688. Throughput: 0: 238.5. Samples: 158976. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:02:20,139][00194] Avg episode reward: [(0, '4.478')] +[2024-09-01 15:02:25,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 630784. Throughput: 0: 235.4. Samples: 159500. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:02:25,143][00194] Avg episode reward: [(0, '4.544')] +[2024-09-01 15:02:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 634880. Throughput: 0: 223.4. Samples: 160542. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:30,144][00194] Avg episode reward: [(0, '4.618')] +[2024-09-01 15:02:35,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 638976. Throughput: 0: 229.2. Samples: 162266. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:35,138][00194] Avg episode reward: [(0, '4.712')] +[2024-09-01 15:02:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 647168. Throughput: 0: 239.9. Samples: 163124. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:02:40,143][00194] Avg episode reward: [(0, '4.754')] +[2024-09-01 15:02:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 651264. Throughput: 0: 227.9. Samples: 164272. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:02:45,140][00194] Avg episode reward: [(0, '4.803')] +[2024-09-01 15:02:49,529][03021] Saving new best policy, reward=4.803! +[2024-09-01 15:02:49,534][03034] Updated weights for policy 0, policy_version 160 (0.1163) +[2024-09-01 15:02:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 655360. Throughput: 0: 217.7. Samples: 165302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:02:50,138][00194] Avg episode reward: [(0, '4.753')] +[2024-09-01 15:02:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 659456. Throughput: 0: 227.1. Samples: 166302. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:02:55,142][00194] Avg episode reward: [(0, '4.763')] +[2024-09-01 15:03:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 663552. Throughput: 0: 235.3. Samples: 167694. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:00,145][00194] Avg episode reward: [(0, '4.789')] +[2024-09-01 15:03:05,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 667648. Throughput: 0: 218.0. Samples: 168786. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:05,142][00194] Avg episode reward: [(0, '4.809')] +[2024-09-01 15:03:07,580][03021] Saving new best policy, reward=4.809! +[2024-09-01 15:03:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 671744. Throughput: 0: 219.8. Samples: 169390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:10,146][00194] Avg episode reward: [(0, '4.767')] +[2024-09-01 15:03:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 675840. Throughput: 0: 239.8. Samples: 171332. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:15,140][00194] Avg episode reward: [(0, '4.704')] +[2024-09-01 15:03:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 679936. Throughput: 0: 227.6. Samples: 172508. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:20,141][00194] Avg episode reward: [(0, '4.710')] +[2024-09-01 15:03:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 684032. Throughput: 0: 217.0. Samples: 172890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:25,145][00194] Avg episode reward: [(0, '4.687')] +[2024-09-01 15:03:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 692224. Throughput: 0: 228.2. Samples: 174542. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:03:30,144][00194] Avg episode reward: [(0, '4.635')] +[2024-09-01 15:03:33,067][03034] Updated weights for policy 0, policy_version 170 (0.0047) +[2024-09-01 15:03:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 696320. Throughput: 0: 237.3. Samples: 175980. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:03:35,138][00194] Avg episode reward: [(0, '4.679')] +[2024-09-01 15:03:40,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 696320. Throughput: 0: 224.8. Samples: 176420. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:03:40,147][00194] Avg episode reward: [(0, '4.692')] +[2024-09-01 15:03:45,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 700416. Throughput: 0: 204.2. Samples: 176884. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:03:45,140][00194] Avg episode reward: [(0, '4.706')] +[2024-09-01 15:03:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 704512. Throughput: 0: 215.0. Samples: 178462. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:03:50,138][00194] Avg episode reward: [(0, '4.624')] +[2024-09-01 15:03:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 712704. Throughput: 0: 215.8. Samples: 179100. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:03:55,145][00194] Avg episode reward: [(0, '4.634')] +[2024-09-01 15:04:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 712704. Throughput: 0: 204.0. Samples: 180512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:00,139][00194] Avg episode reward: [(0, '4.586')] +[2024-09-01 15:04:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 720896. Throughput: 0: 202.8. Samples: 181632. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:05,141][00194] Avg episode reward: [(0, '4.599')] +[2024-09-01 15:04:10,137][00194] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 724992. Throughput: 0: 218.0. Samples: 182700. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:10,142][00194] Avg episode reward: [(0, '4.503')] +[2024-09-01 15:04:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 729088. Throughput: 0: 209.5. Samples: 183970. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:15,142][00194] Avg episode reward: [(0, '4.625')] +[2024-09-01 15:04:18,125][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000179_733184.pth... +[2024-09-01 15:04:18,227][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000127_520192.pth +[2024-09-01 15:04:20,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 733184. Throughput: 0: 201.3. Samples: 185038. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:20,139][00194] Avg episode reward: [(0, '4.690')] +[2024-09-01 15:04:22,805][03034] Updated weights for policy 0, policy_version 180 (0.1020) +[2024-09-01 15:04:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 737280. Throughput: 0: 206.0. Samples: 185692. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:25,139][00194] Avg episode reward: [(0, '4.697')] +[2024-09-01 15:04:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 741376. Throughput: 0: 233.2. Samples: 187380. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:30,139][00194] Avg episode reward: [(0, '4.765')] +[2024-09-01 15:04:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 745472. Throughput: 0: 228.0. Samples: 188724. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:04:35,138][00194] Avg episode reward: [(0, '4.752')] +[2024-09-01 15:04:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 749568. Throughput: 0: 221.6. Samples: 189072. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:04:40,138][00194] Avg episode reward: [(0, '4.782')] +[2024-09-01 15:04:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 757760. Throughput: 0: 228.0. Samples: 190772. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:04:45,138][00194] Avg episode reward: [(0, '4.886')] +[2024-09-01 15:04:48,333][03021] Saving new best policy, reward=4.886! +[2024-09-01 15:04:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 761856. Throughput: 0: 234.7. Samples: 192194. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:04:50,142][00194] Avg episode reward: [(0, '4.834')] +[2024-09-01 15:04:55,139][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 765952. Throughput: 0: 226.4. Samples: 192890. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:04:55,144][00194] Avg episode reward: [(0, '4.818')] +[2024-09-01 15:05:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 770048. Throughput: 0: 222.3. Samples: 193974. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:05:00,139][00194] Avg episode reward: [(0, '4.824')] +[2024-09-01 15:05:05,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 774144. Throughput: 0: 239.6. Samples: 195820. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:05,143][00194] Avg episode reward: [(0, '4.840')] +[2024-09-01 15:05:06,480][03034] Updated weights for policy 0, policy_version 190 (0.1028) +[2024-09-01 15:05:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 778240. Throughput: 0: 234.8. Samples: 196256. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:10,146][00194] Avg episode reward: [(0, '4.802')] +[2024-09-01 15:05:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 782336. Throughput: 0: 222.0. Samples: 197368. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:15,138][00194] Avg episode reward: [(0, '4.795')] +[2024-09-01 15:05:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 786432. Throughput: 0: 231.2. Samples: 199130. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:20,138][00194] Avg episode reward: [(0, '4.893')] +[2024-09-01 15:05:24,273][03021] Saving new best policy, reward=4.893! +[2024-09-01 15:05:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 794624. Throughput: 0: 242.6. Samples: 199990. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:25,138][00194] Avg episode reward: [(0, '4.792')] +[2024-09-01 15:05:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 798720. Throughput: 0: 230.7. Samples: 201154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:05:30,145][00194] Avg episode reward: [(0, '4.761')] +[2024-09-01 15:05:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 802816. Throughput: 0: 222.8. Samples: 202222. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:05:35,139][00194] Avg episode reward: [(0, '4.781')] +[2024-09-01 15:05:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 806912. Throughput: 0: 229.4. Samples: 203212. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:40,138][00194] Avg episode reward: [(0, '4.901')] +[2024-09-01 15:05:42,114][03021] Saving new best policy, reward=4.901! +[2024-09-01 15:05:45,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 811008. Throughput: 0: 236.3. Samples: 204608. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:45,140][00194] Avg episode reward: [(0, '4.914')] +[2024-09-01 15:05:48,029][03021] Saving new best policy, reward=4.914! +[2024-09-01 15:05:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 815104. Throughput: 0: 218.4. Samples: 205650. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:50,139][00194] Avg episode reward: [(0, '4.971')] +[2024-09-01 15:05:52,732][03021] Saving new best policy, reward=4.971! +[2024-09-01 15:05:52,737][03034] Updated weights for policy 0, policy_version 200 (0.0549) +[2024-09-01 15:05:54,974][03021] Signal inference workers to stop experience collection... (200 times) +[2024-09-01 15:05:55,011][03034] InferenceWorker_p0-w0: stopping experience collection (200 times) +[2024-09-01 15:05:55,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 819200. Throughput: 0: 223.1. Samples: 206294. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:05:55,138][00194] Avg episode reward: [(0, '4.972')] +[2024-09-01 15:05:56,461][03021] Signal inference workers to resume experience collection... (200 times) +[2024-09-01 15:05:56,462][03034] InferenceWorker_p0-w0: resuming experience collection (200 times) +[2024-09-01 15:06:00,144][00194] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 823296. Throughput: 0: 240.2. Samples: 208180. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:00,150][00194] Avg episode reward: [(0, '4.914')] +[2024-09-01 15:06:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 827392. Throughput: 0: 227.9. Samples: 209384. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:05,139][00194] Avg episode reward: [(0, '4.894')] +[2024-09-01 15:06:10,136][00194] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 831488. Throughput: 0: 217.1. Samples: 209760. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:10,144][00194] Avg episode reward: [(0, '4.865')] +[2024-09-01 15:06:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 839680. Throughput: 0: 227.7. Samples: 211400. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:06:15,138][00194] Avg episode reward: [(0, '4.890')] +[2024-09-01 15:06:18,004][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000206_843776.pth... +[2024-09-01 15:06:18,115][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000153_626688.pth +[2024-09-01 15:06:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 843776. Throughput: 0: 237.2. Samples: 212898. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:06:20,141][00194] Avg episode reward: [(0, '4.935')] +[2024-09-01 15:06:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 847872. Throughput: 0: 226.6. Samples: 213408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:25,141][00194] Avg episode reward: [(0, '4.932')] +[2024-09-01 15:06:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 851968. Throughput: 0: 220.2. Samples: 214516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:30,139][00194] Avg episode reward: [(0, '5.019')] +[2024-09-01 15:06:32,231][03021] Saving new best policy, reward=5.019! +[2024-09-01 15:06:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 856064. Throughput: 0: 241.8. Samples: 216530. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:35,139][00194] Avg episode reward: [(0, '4.896')] +[2024-09-01 15:06:36,152][03034] Updated weights for policy 0, policy_version 210 (0.1038) +[2024-09-01 15:06:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 860160. Throughput: 0: 237.0. Samples: 216960. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:06:40,138][00194] Avg episode reward: [(0, '5.017')] +[2024-09-01 15:06:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 864256. Throughput: 0: 219.9. Samples: 218074. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:06:45,138][00194] Avg episode reward: [(0, '5.050')] +[2024-09-01 15:06:50,137][03021] Saving new best policy, reward=5.050! +[2024-09-01 15:06:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 868352. Throughput: 0: 228.8. Samples: 219678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:06:50,148][00194] Avg episode reward: [(0, '5.099')] +[2024-09-01 15:06:54,115][03021] Saving new best policy, reward=5.099! +[2024-09-01 15:06:55,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 876544. Throughput: 0: 241.8. Samples: 220640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:06:55,143][00194] Avg episode reward: [(0, '5.143')] +[2024-09-01 15:06:59,804][03021] Saving new best policy, reward=5.143! +[2024-09-01 15:07:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.9, 300 sec: 902.5). Total num frames: 880640. Throughput: 0: 230.2. Samples: 221760. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:07:00,143][00194] Avg episode reward: [(0, '5.173')] +[2024-09-01 15:07:04,754][03021] Saving new best policy, reward=5.173! +[2024-09-01 15:07:05,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 884736. Throughput: 0: 221.0. Samples: 222844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:07:05,139][00194] Avg episode reward: [(0, '5.234')] +[2024-09-01 15:07:08,565][03021] Saving new best policy, reward=5.234! +[2024-09-01 15:07:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 888832. Throughput: 0: 231.5. Samples: 223826. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:07:10,139][00194] Avg episode reward: [(0, '5.251')] +[2024-09-01 15:07:12,406][03021] Saving new best policy, reward=5.251! +[2024-09-01 15:07:15,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 892928. Throughput: 0: 236.7. Samples: 225168. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:07:15,144][00194] Avg episode reward: [(0, '5.313')] +[2024-09-01 15:07:18,300][03021] Saving new best policy, reward=5.313! +[2024-09-01 15:07:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 897024. Throughput: 0: 215.1. Samples: 226208. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:07:20,141][00194] Avg episode reward: [(0, '5.267')] +[2024-09-01 15:07:22,874][03034] Updated weights for policy 0, policy_version 220 (0.0056) +[2024-09-01 15:07:25,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 901120. Throughput: 0: 221.4. Samples: 226922. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:07:25,138][00194] Avg episode reward: [(0, '5.138')] +[2024-09-01 15:07:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 905216. Throughput: 0: 237.6. Samples: 228766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:07:30,147][00194] Avg episode reward: [(0, '5.040')] +[2024-09-01 15:07:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 909312. Throughput: 0: 227.8. Samples: 229928. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:07:35,138][00194] Avg episode reward: [(0, '4.980')] +[2024-09-01 15:07:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 913408. Throughput: 0: 213.1. Samples: 230228. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:07:40,143][00194] Avg episode reward: [(0, '4.943')] +[2024-09-01 15:07:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 921600. Throughput: 0: 226.8. Samples: 231964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:07:45,143][00194] Avg episode reward: [(0, '5.038')] +[2024-09-01 15:07:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 925696. Throughput: 0: 232.8. Samples: 233320. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:07:50,142][00194] Avg episode reward: [(0, '5.004')] +[2024-09-01 15:07:55,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 929792. Throughput: 0: 225.4. Samples: 233968. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:07:55,144][00194] Avg episode reward: [(0, '5.034')] +[2024-09-01 15:08:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 933888. Throughput: 0: 220.1. Samples: 235070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:08:00,138][00194] Avg episode reward: [(0, '5.008')] +[2024-09-01 15:08:05,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 937984. Throughput: 0: 240.2. Samples: 237016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:08:05,139][00194] Avg episode reward: [(0, '5.089')] +[2024-09-01 15:08:06,015][03034] Updated weights for policy 0, policy_version 230 (0.0049) +[2024-09-01 15:08:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 942080. Throughput: 0: 232.2. Samples: 237372. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:08:10,139][00194] Avg episode reward: [(0, '5.118')] +[2024-09-01 15:08:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 946176. Throughput: 0: 216.0. Samples: 238488. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:08:15,141][00194] Avg episode reward: [(0, '5.120')] +[2024-09-01 15:08:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 950272. Throughput: 0: 227.3. Samples: 240156. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:08:20,148][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000233_954368.pth... +[2024-09-01 15:08:20,144][00194] Avg episode reward: [(0, '5.153')] +[2024-09-01 15:08:20,244][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000179_733184.pth +[2024-09-01 15:08:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 958464. Throughput: 0: 243.2. Samples: 241170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:08:25,145][00194] Avg episode reward: [(0, '5.289')] +[2024-09-01 15:08:30,139][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 962560. Throughput: 0: 227.2. Samples: 242190. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:30,141][00194] Avg episode reward: [(0, '5.218')] +[2024-09-01 15:08:35,139][00194] Fps is (10 sec: 819.0, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 966656. Throughput: 0: 223.0. Samples: 243356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:35,141][00194] Avg episode reward: [(0, '5.357')] +[2024-09-01 15:08:38,012][03021] Saving new best policy, reward=5.357! +[2024-09-01 15:08:40,136][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 970752. Throughput: 0: 228.4. Samples: 244246. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:40,138][00194] Avg episode reward: [(0, '5.368')] +[2024-09-01 15:08:41,954][03021] Saving new best policy, reward=5.368! +[2024-09-01 15:08:45,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 974848. Throughput: 0: 234.8. Samples: 245638. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:45,142][00194] Avg episode reward: [(0, '5.387')] +[2024-09-01 15:08:47,650][03021] Saving new best policy, reward=5.387! +[2024-09-01 15:08:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 978944. Throughput: 0: 216.0. Samples: 246738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:50,141][00194] Avg episode reward: [(0, '5.273')] +[2024-09-01 15:08:51,987][03034] Updated weights for policy 0, policy_version 240 (0.0550) +[2024-09-01 15:08:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 983040. Throughput: 0: 224.0. Samples: 247452. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:08:55,139][00194] Avg episode reward: [(0, '5.202')] +[2024-09-01 15:09:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 991232. Throughput: 0: 240.3. Samples: 249302. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:00,140][00194] Avg episode reward: [(0, '5.264')] +[2024-09-01 15:09:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 991232. Throughput: 0: 225.0. Samples: 250282. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:05,139][00194] Avg episode reward: [(0, '5.497')] +[2024-09-01 15:09:09,936][03021] Saving new best policy, reward=5.497! +[2024-09-01 15:09:10,137][00194] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 999424. Throughput: 0: 215.1. Samples: 250848. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:10,141][00194] Avg episode reward: [(0, '5.322')] +[2024-09-01 15:09:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1003520. Throughput: 0: 225.7. Samples: 252344. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:15,138][00194] Avg episode reward: [(0, '5.286')] +[2024-09-01 15:09:20,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1007616. Throughput: 0: 232.5. Samples: 253816. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:20,141][00194] Avg episode reward: [(0, '5.171')] +[2024-09-01 15:09:25,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1011712. Throughput: 0: 225.7. Samples: 254404. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:25,143][00194] Avg episode reward: [(0, '5.128')] +[2024-09-01 15:09:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1015808. Throughput: 0: 224.1. Samples: 255722. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:09:30,140][00194] Avg episode reward: [(0, '5.294')] +[2024-09-01 15:09:35,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1019904. Throughput: 0: 239.0. Samples: 257494. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:09:35,144][00194] Avg episode reward: [(0, '5.326')] +[2024-09-01 15:09:35,395][03034] Updated weights for policy 0, policy_version 250 (0.0061) +[2024-09-01 15:09:39,394][03021] Signal inference workers to stop experience collection... (250 times) +[2024-09-01 15:09:39,524][03034] InferenceWorker_p0-w0: stopping experience collection (250 times) +[2024-09-01 15:09:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1024000. Throughput: 0: 234.9. Samples: 258024. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:09:40,141][00194] Avg episode reward: [(0, '5.417')] +[2024-09-01 15:09:41,133][03021] Signal inference workers to resume experience collection... (250 times) +[2024-09-01 15:09:41,136][03034] InferenceWorker_p0-w0: resuming experience collection (250 times) +[2024-09-01 15:09:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1028096. Throughput: 0: 219.5. Samples: 259178. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:09:45,139][00194] Avg episode reward: [(0, '5.498')] +[2024-09-01 15:09:49,340][03021] Saving new best policy, reward=5.498! +[2024-09-01 15:09:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1036288. Throughput: 0: 230.7. Samples: 260664. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:09:50,139][00194] Avg episode reward: [(0, '5.447')] +[2024-09-01 15:09:55,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1040384. Throughput: 0: 238.4. Samples: 261574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:09:55,153][00194] Avg episode reward: [(0, '5.496')] +[2024-09-01 15:10:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1044480. Throughput: 0: 227.6. Samples: 262586. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:10:00,139][00194] Avg episode reward: [(0, '5.549')] +[2024-09-01 15:10:03,716][03021] Saving new best policy, reward=5.549! +[2024-09-01 15:10:05,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1048576. Throughput: 0: 225.6. Samples: 263966. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:10:05,144][00194] Avg episode reward: [(0, '5.649')] +[2024-09-01 15:10:07,553][03021] Saving new best policy, reward=5.649! +[2024-09-01 15:10:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1052672. Throughput: 0: 227.4. Samples: 264638. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:10,146][00194] Avg episode reward: [(0, '5.691')] +[2024-09-01 15:10:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1056768. Throughput: 0: 228.5. Samples: 266004. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:15,139][00194] Avg episode reward: [(0, '5.710')] +[2024-09-01 15:10:17,000][03021] Saving new best policy, reward=5.691! +[2024-09-01 15:10:17,103][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000259_1060864.pth... +[2024-09-01 15:10:17,286][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000206_843776.pth +[2024-09-01 15:10:17,310][03021] Saving new best policy, reward=5.710! +[2024-09-01 15:10:20,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1060864. Throughput: 0: 217.1. Samples: 267262. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:20,139][00194] Avg episode reward: [(0, '5.703')] +[2024-09-01 15:10:21,943][03034] Updated weights for policy 0, policy_version 260 (0.0722) +[2024-09-01 15:10:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1064960. Throughput: 0: 219.8. Samples: 267914. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:10:25,139][00194] Avg episode reward: [(0, '5.700')] +[2024-09-01 15:10:30,136][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1073152. Throughput: 0: 234.6. Samples: 269734. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:30,139][00194] Avg episode reward: [(0, '5.906')] +[2024-09-01 15:10:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1073152. Throughput: 0: 224.4. Samples: 270760. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:35,138][00194] Avg episode reward: [(0, '5.824')] +[2024-09-01 15:10:35,459][03021] Saving new best policy, reward=5.906! +[2024-09-01 15:10:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1081344. Throughput: 0: 215.5. Samples: 271270. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:40,139][00194] Avg episode reward: [(0, '5.875')] +[2024-09-01 15:10:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1085440. Throughput: 0: 227.7. Samples: 272832. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:45,138][00194] Avg episode reward: [(0, '5.695')] +[2024-09-01 15:10:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1089536. Throughput: 0: 231.3. Samples: 274376. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:50,138][00194] Avg episode reward: [(0, '5.816')] +[2024-09-01 15:10:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1093632. Throughput: 0: 227.4. Samples: 274872. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:10:55,141][00194] Avg episode reward: [(0, '5.754')] +[2024-09-01 15:11:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1097728. Throughput: 0: 221.0. Samples: 275948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:11:00,139][00194] Avg episode reward: [(0, '5.781')] +[2024-09-01 15:11:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1101824. Throughput: 0: 238.0. Samples: 277970. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:11:05,144][00194] Avg episode reward: [(0, '5.757')] +[2024-09-01 15:11:05,615][03034] Updated weights for policy 0, policy_version 270 (0.1032) +[2024-09-01 15:11:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1105920. Throughput: 0: 235.9. Samples: 278528. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:11:10,138][00194] Avg episode reward: [(0, '5.848')] +[2024-09-01 15:11:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1110016. Throughput: 0: 218.7. Samples: 279576. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:11:15,143][00194] Avg episode reward: [(0, '5.596')] +[2024-09-01 15:11:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1118208. Throughput: 0: 230.5. Samples: 281134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:11:20,142][00194] Avg episode reward: [(0, '5.533')] +[2024-09-01 15:11:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1122304. Throughput: 0: 240.7. Samples: 282100. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:11:25,138][00194] Avg episode reward: [(0, '5.706')] +[2024-09-01 15:11:30,140][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1126400. Throughput: 0: 229.2. Samples: 283146. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:11:30,152][00194] Avg episode reward: [(0, '5.779')] +[2024-09-01 15:11:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1130496. Throughput: 0: 225.8. Samples: 284536. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:11:35,139][00194] Avg episode reward: [(0, '5.730')] +[2024-09-01 15:11:40,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1134592. Throughput: 0: 230.4. Samples: 285242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:11:40,144][00194] Avg episode reward: [(0, '5.756')] +[2024-09-01 15:11:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1138688. Throughput: 0: 244.0. Samples: 286930. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:11:45,138][00194] Avg episode reward: [(0, '5.751')] +[2024-09-01 15:11:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1142784. Throughput: 0: 222.8. Samples: 287998. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:11:50,142][00194] Avg episode reward: [(0, '5.934')] +[2024-09-01 15:11:51,522][03021] Saving new best policy, reward=5.934! +[2024-09-01 15:11:51,531][03034] Updated weights for policy 0, policy_version 280 (0.0527) +[2024-09-01 15:11:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1146880. Throughput: 0: 226.8. Samples: 288732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:11:55,141][00194] Avg episode reward: [(0, '6.167')] +[2024-09-01 15:11:59,094][03021] Saving new best policy, reward=6.167! +[2024-09-01 15:12:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1155072. Throughput: 0: 238.4. Samples: 290302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:12:00,143][00194] Avg episode reward: [(0, '6.073')] +[2024-09-01 15:12:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1159168. Throughput: 0: 228.0. Samples: 291394. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:12:05,141][00194] Avg episode reward: [(0, '6.193')] +[2024-09-01 15:12:09,569][03021] Saving new best policy, reward=6.193! +[2024-09-01 15:12:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1163264. Throughput: 0: 222.5. Samples: 292112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:12:10,142][00194] Avg episode reward: [(0, '6.283')] +[2024-09-01 15:12:13,418][03021] Saving new best policy, reward=6.283! +[2024-09-01 15:12:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1167360. Throughput: 0: 230.2. Samples: 293504. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:12:15,143][00194] Avg episode reward: [(0, '6.266')] +[2024-09-01 15:12:17,182][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000286_1171456.pth... +[2024-09-01 15:12:17,285][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000233_954368.pth +[2024-09-01 15:12:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1171456. Throughput: 0: 236.5. Samples: 295178. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:12:20,145][00194] Avg episode reward: [(0, '6.343')] +[2024-09-01 15:12:22,818][03021] Saving new best policy, reward=6.343! +[2024-09-01 15:12:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1175552. Throughput: 0: 228.8. Samples: 295536. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:12:25,148][00194] Avg episode reward: [(0, '6.317')] +[2024-09-01 15:12:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1179648. Throughput: 0: 222.4. Samples: 296940. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:30,139][00194] Avg episode reward: [(0, '6.348')] +[2024-09-01 15:12:34,929][03021] Saving new best policy, reward=6.348! +[2024-09-01 15:12:34,946][03034] Updated weights for policy 0, policy_version 290 (0.0073) +[2024-09-01 15:12:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1187840. Throughput: 0: 235.0. Samples: 298572. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:35,140][00194] Avg episode reward: [(0, '6.431')] +[2024-09-01 15:12:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1187840. Throughput: 0: 234.4. Samples: 299280. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:40,139][00194] Avg episode reward: [(0, '6.319')] +[2024-09-01 15:12:40,529][03021] Saving new best policy, reward=6.431! +[2024-09-01 15:12:45,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1191936. Throughput: 0: 224.2. Samples: 300392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:45,138][00194] Avg episode reward: [(0, '6.147')] +[2024-09-01 15:12:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1200128. Throughput: 0: 229.2. Samples: 301710. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:50,143][00194] Avg episode reward: [(0, '6.167')] +[2024-09-01 15:12:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1204224. Throughput: 0: 236.3. Samples: 302744. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:12:55,152][00194] Avg episode reward: [(0, '6.236')] +[2024-09-01 15:13:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1208320. Throughput: 0: 228.6. Samples: 303792. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:13:00,138][00194] Avg episode reward: [(0, '6.283')] +[2024-09-01 15:13:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1212416. Throughput: 0: 219.6. Samples: 305060. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:13:05,138][00194] Avg episode reward: [(0, '6.165')] +[2024-09-01 15:13:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1216512. Throughput: 0: 228.8. Samples: 305830. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:13:10,140][00194] Avg episode reward: [(0, '6.279')] +[2024-09-01 15:13:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1220608. Throughput: 0: 229.5. Samples: 307266. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:13:15,139][00194] Avg episode reward: [(0, '6.328')] +[2024-09-01 15:13:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1224704. Throughput: 0: 221.9. Samples: 308558. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:13:20,145][00194] Avg episode reward: [(0, '6.246')] +[2024-09-01 15:13:21,318][03034] Updated weights for policy 0, policy_version 300 (0.2803) +[2024-09-01 15:13:23,529][03021] Signal inference workers to stop experience collection... (300 times) +[2024-09-01 15:13:23,568][03034] InferenceWorker_p0-w0: stopping experience collection (300 times) +[2024-09-01 15:13:24,995][03021] Signal inference workers to resume experience collection... (300 times) +[2024-09-01 15:13:24,996][03034] InferenceWorker_p0-w0: resuming experience collection (300 times) +[2024-09-01 15:13:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1232896. Throughput: 0: 224.6. Samples: 309386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:13:25,142][00194] Avg episode reward: [(0, '6.128')] +[2024-09-01 15:13:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1236992. Throughput: 0: 232.9. Samples: 310872. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:13:30,141][00194] Avg episode reward: [(0, '6.167')] +[2024-09-01 15:13:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1241088. Throughput: 0: 226.8. Samples: 311914. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:13:35,142][00194] Avg episode reward: [(0, '6.330')] +[2024-09-01 15:13:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1245184. Throughput: 0: 218.6. Samples: 312580. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:13:40,139][00194] Avg episode reward: [(0, '6.510')] +[2024-09-01 15:13:42,819][03021] Saving new best policy, reward=6.510! +[2024-09-01 15:13:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1249280. Throughput: 0: 231.6. Samples: 314216. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:13:45,142][00194] Avg episode reward: [(0, '6.641')] +[2024-09-01 15:13:50,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1253376. Throughput: 0: 220.0. Samples: 314960. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:13:50,145][00194] Avg episode reward: [(0, '6.569')] +[2024-09-01 15:13:55,152][00194] Fps is (10 sec: 409.0, 60 sec: 819.0, 300 sec: 888.6). Total num frames: 1253376. Throughput: 0: 210.5. Samples: 315306. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:13:55,161][00194] Avg episode reward: [(0, '6.626')] +[2024-09-01 15:13:59,760][03021] Saving new best policy, reward=6.641! +[2024-09-01 15:14:00,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1257472. Throughput: 0: 190.1. Samples: 315820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:14:00,142][00194] Avg episode reward: [(0, '6.549')] +[2024-09-01 15:14:05,136][00194] Fps is (10 sec: 410.2, 60 sec: 750.9, 300 sec: 874.7). Total num frames: 1257472. Throughput: 0: 184.2. Samples: 316846. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:14:05,142][00194] Avg episode reward: [(0, '6.598')] +[2024-09-01 15:14:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1265664. Throughput: 0: 181.2. Samples: 317540. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:14:10,141][00194] Avg episode reward: [(0, '6.628')] +[2024-09-01 15:14:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1269760. Throughput: 0: 178.1. Samples: 318886. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:15,145][03034] Updated weights for policy 0, policy_version 310 (0.3333) +[2024-09-01 15:14:15,146][00194] Avg episode reward: [(0, '6.511')] +[2024-09-01 15:14:19,878][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000311_1273856.pth... +[2024-09-01 15:14:19,988][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000259_1060864.pth +[2024-09-01 15:14:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1273856. Throughput: 0: 177.3. Samples: 319894. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:20,144][00194] Avg episode reward: [(0, '6.553')] +[2024-09-01 15:14:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 888.6). Total num frames: 1277952. Throughput: 0: 185.9. Samples: 320944. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:25,139][00194] Avg episode reward: [(0, '6.714')] +[2024-09-01 15:14:27,506][03021] Saving new best policy, reward=6.714! +[2024-09-01 15:14:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 888.6). Total num frames: 1282048. Throughput: 0: 179.1. Samples: 322274. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:30,144][00194] Avg episode reward: [(0, '6.913')] +[2024-09-01 15:14:33,142][03021] Saving new best policy, reward=6.913! +[2024-09-01 15:14:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 888.6). Total num frames: 1286144. Throughput: 0: 185.7. Samples: 323316. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:35,139][00194] Avg episode reward: [(0, '6.828')] +[2024-09-01 15:14:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 888.6). Total num frames: 1290240. Throughput: 0: 193.5. Samples: 324012. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:40,139][00194] Avg episode reward: [(0, '6.677')] +[2024-09-01 15:14:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 874.7). Total num frames: 1294336. Throughput: 0: 218.5. Samples: 325652. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:14:45,145][00194] Avg episode reward: [(0, '6.578')] +[2024-09-01 15:14:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 874.7). Total num frames: 1298432. Throughput: 0: 225.8. Samples: 327008. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:14:50,141][00194] Avg episode reward: [(0, '6.610')] +[2024-09-01 15:14:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.4, 300 sec: 874.7). Total num frames: 1302528. Throughput: 0: 218.0. Samples: 327352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:14:55,140][00194] Avg episode reward: [(0, '6.819')] +[2024-09-01 15:14:59,874][03034] Updated weights for policy 0, policy_version 320 (0.0546) +[2024-09-01 15:15:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1310720. Throughput: 0: 226.2. Samples: 329064. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:15:00,142][00194] Avg episode reward: [(0, '6.961')] +[2024-09-01 15:15:03,678][03021] Saving new best policy, reward=6.961! +[2024-09-01 15:15:05,140][00194] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1314816. Throughput: 0: 235.3. Samples: 330482. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 15:15:05,151][00194] Avg episode reward: [(0, '7.015')] +[2024-09-01 15:15:09,277][03021] Saving new best policy, reward=7.015! +[2024-09-01 15:15:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1318912. Throughput: 0: 227.0. Samples: 331158. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:10,145][00194] Avg episode reward: [(0, '7.072')] +[2024-09-01 15:15:14,145][03021] Saving new best policy, reward=7.072! +[2024-09-01 15:15:15,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1323008. Throughput: 0: 221.0. Samples: 332220. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:15,143][00194] Avg episode reward: [(0, '7.007')] +[2024-09-01 15:15:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1327104. Throughput: 0: 234.8. Samples: 333882. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:20,143][00194] Avg episode reward: [(0, '6.911')] +[2024-09-01 15:15:25,136][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1331200. Throughput: 0: 234.4. Samples: 334562. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:25,147][00194] Avg episode reward: [(0, '6.986')] +[2024-09-01 15:15:30,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1335296. Throughput: 0: 221.2. Samples: 335608. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:15:30,143][00194] Avg episode reward: [(0, '6.933')] +[2024-09-01 15:15:35,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1339392. Throughput: 0: 225.0. Samples: 337132. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:15:35,139][00194] Avg episode reward: [(0, '7.059')] +[2024-09-01 15:15:40,136][00194] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1347584. Throughput: 0: 237.3. Samples: 338032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:15:40,142][00194] Avg episode reward: [(0, '7.376')] +[2024-09-01 15:15:45,089][03021] Saving new best policy, reward=7.376! +[2024-09-01 15:15:45,099][03034] Updated weights for policy 0, policy_version 330 (0.0529) +[2024-09-01 15:15:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1351680. Throughput: 0: 229.2. Samples: 339380. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:45,139][00194] Avg episode reward: [(0, '7.517')] +[2024-09-01 15:15:49,923][03021] Saving new best policy, reward=7.517! +[2024-09-01 15:15:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1355776. Throughput: 0: 220.7. Samples: 340412. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:15:50,139][00194] Avg episode reward: [(0, '7.502')] +[2024-09-01 15:15:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1359872. Throughput: 0: 229.2. Samples: 341472. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:15:55,139][00194] Avg episode reward: [(0, '7.252')] +[2024-09-01 15:16:00,139][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1363968. Throughput: 0: 236.3. Samples: 342856. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:16:00,147][00194] Avg episode reward: [(0, '7.138')] +[2024-09-01 15:16:05,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1368064. Throughput: 0: 219.8. Samples: 343772. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:16:05,144][00194] Avg episode reward: [(0, '7.174')] +[2024-09-01 15:16:10,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1372160. Throughput: 0: 224.0. Samples: 344644. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:16:10,142][00194] Avg episode reward: [(0, '7.181')] +[2024-09-01 15:16:15,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1376256. Throughput: 0: 241.7. Samples: 346482. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:16:15,144][00194] Avg episode reward: [(0, '7.125')] +[2024-09-01 15:16:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1380352. Throughput: 0: 233.2. Samples: 347628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:16:20,139][00194] Avg episode reward: [(0, '7.162')] +[2024-09-01 15:16:21,157][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000338_1384448.pth... +[2024-09-01 15:16:21,298][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000286_1171456.pth +[2024-09-01 15:16:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 1384448. Throughput: 0: 224.4. Samples: 348128. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:16:25,146][00194] Avg episode reward: [(0, '7.133')] +[2024-09-01 15:16:29,921][03034] Updated weights for policy 0, policy_version 340 (0.1170) +[2024-09-01 15:16:30,137][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1392640. Throughput: 0: 232.4. Samples: 349840. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:16:30,139][00194] Avg episode reward: [(0, '7.302')] +[2024-09-01 15:16:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1396736. Throughput: 0: 234.5. Samples: 350966. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:16:35,141][00194] Avg episode reward: [(0, '7.031')] +[2024-09-01 15:16:40,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1400832. Throughput: 0: 226.8. Samples: 351678. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:16:40,140][00194] Avg episode reward: [(0, '6.916')] +[2024-09-01 15:16:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1404928. Throughput: 0: 224.9. Samples: 352976. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:16:45,139][00194] Avg episode reward: [(0, '6.753')] +[2024-09-01 15:16:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1409024. Throughput: 0: 243.8. Samples: 354744. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:16:50,154][00194] Avg episode reward: [(0, '6.763')] +[2024-09-01 15:16:55,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1413120. Throughput: 0: 233.5. Samples: 355154. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:16:55,147][00194] Avg episode reward: [(0, '6.884')] +[2024-09-01 15:17:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1417216. Throughput: 0: 218.9. Samples: 356334. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:17:00,141][00194] Avg episode reward: [(0, '6.823')] +[2024-09-01 15:17:05,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1421312. Throughput: 0: 230.3. Samples: 357992. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:17:05,144][00194] Avg episode reward: [(0, '7.117')] +[2024-09-01 15:17:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1429504. Throughput: 0: 236.2. Samples: 358756. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:10,139][00194] Avg episode reward: [(0, '7.097')] +[2024-09-01 15:17:14,793][03034] Updated weights for policy 0, policy_version 350 (0.1582) +[2024-09-01 15:17:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1433600. Throughput: 0: 225.8. Samples: 360000. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:15,139][00194] Avg episode reward: [(0, '7.085')] +[2024-09-01 15:17:18,875][03021] Signal inference workers to stop experience collection... (350 times) +[2024-09-01 15:17:18,910][03034] InferenceWorker_p0-w0: stopping experience collection (350 times) +[2024-09-01 15:17:19,857][03021] Signal inference workers to resume experience collection... (350 times) +[2024-09-01 15:17:19,859][03034] InferenceWorker_p0-w0: resuming experience collection (350 times) +[2024-09-01 15:17:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1437696. Throughput: 0: 224.4. Samples: 361062. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:20,145][00194] Avg episode reward: [(0, '7.128')] +[2024-09-01 15:17:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1441792. Throughput: 0: 229.5. Samples: 362004. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:25,143][00194] Avg episode reward: [(0, '7.602')] +[2024-09-01 15:17:27,477][03021] Saving new best policy, reward=7.602! +[2024-09-01 15:17:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1445888. Throughput: 0: 236.4. Samples: 363612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:30,138][00194] Avg episode reward: [(0, '7.554')] +[2024-09-01 15:17:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1449984. Throughput: 0: 219.3. Samples: 364612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:35,139][00194] Avg episode reward: [(0, '7.587')] +[2024-09-01 15:17:40,136][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1454080. Throughput: 0: 222.6. Samples: 365170. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:40,139][00194] Avg episode reward: [(0, '7.781')] +[2024-09-01 15:17:41,646][03021] Saving new best policy, reward=7.781! +[2024-09-01 15:17:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1458176. Throughput: 0: 239.3. Samples: 367104. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:45,146][00194] Avg episode reward: [(0, '7.998')] +[2024-09-01 15:17:50,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1462272. Throughput: 0: 228.2. Samples: 368260. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:17:50,139][00194] Avg episode reward: [(0, '8.053')] +[2024-09-01 15:17:50,553][03021] Saving new best policy, reward=7.998! +[2024-09-01 15:17:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1466368. Throughput: 0: 220.9. Samples: 368696. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:17:55,140][00194] Avg episode reward: [(0, '8.182')] +[2024-09-01 15:17:55,831][03021] Saving new best policy, reward=8.053! +[2024-09-01 15:17:59,646][03021] Saving new best policy, reward=8.182! +[2024-09-01 15:17:59,666][03034] Updated weights for policy 0, policy_version 360 (0.1053) +[2024-09-01 15:18:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1474560. Throughput: 0: 230.3. Samples: 370362. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:00,139][00194] Avg episode reward: [(0, '8.015')] +[2024-09-01 15:18:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1478656. Throughput: 0: 233.6. Samples: 371574. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:05,138][00194] Avg episode reward: [(0, '8.058')] +[2024-09-01 15:18:10,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1482752. Throughput: 0: 228.5. Samples: 372288. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:18:10,151][00194] Avg episode reward: [(0, '8.125')] +[2024-09-01 15:18:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1486848. Throughput: 0: 216.3. Samples: 373346. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:18:15,140][00194] Avg episode reward: [(0, '7.960')] +[2024-09-01 15:18:17,522][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000364_1490944.pth... +[2024-09-01 15:18:17,625][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000311_1273856.pth +[2024-09-01 15:18:20,136][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1490944. Throughput: 0: 236.2. Samples: 375240. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:20,147][00194] Avg episode reward: [(0, '7.430')] +[2024-09-01 15:18:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1495040. Throughput: 0: 233.9. Samples: 375696. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:25,141][00194] Avg episode reward: [(0, '7.439')] +[2024-09-01 15:18:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1499136. Throughput: 0: 218.5. Samples: 376938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:30,139][00194] Avg episode reward: [(0, '7.559')] +[2024-09-01 15:18:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 1503232. Throughput: 0: 224.2. Samples: 378348. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:35,147][00194] Avg episode reward: [(0, '7.365')] +[2024-09-01 15:18:40,136][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1511424. Throughput: 0: 236.2. Samples: 379326. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:18:40,138][00194] Avg episode reward: [(0, '7.387')] +[2024-09-01 15:18:44,260][03034] Updated weights for policy 0, policy_version 370 (0.1036) +[2024-09-01 15:18:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 1515520. Throughput: 0: 225.3. Samples: 380500. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:18:45,140][00194] Avg episode reward: [(0, '7.382')] +[2024-09-01 15:18:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.6). Total num frames: 1519616. Throughput: 0: 223.1. Samples: 381612. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:18:50,139][00194] Avg episode reward: [(0, '7.451')] +[2024-09-01 15:18:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1523712. Throughput: 0: 226.3. Samples: 382472. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:18:55,139][00194] Avg episode reward: [(0, '7.643')] +[2024-09-01 15:19:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1527808. Throughput: 0: 241.7. Samples: 384222. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:19:00,139][00194] Avg episode reward: [(0, '7.557')] +[2024-09-01 15:19:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1531904. Throughput: 0: 221.1. Samples: 385188. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:19:05,141][00194] Avg episode reward: [(0, '7.454')] +[2024-09-01 15:19:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 1536000. Throughput: 0: 223.6. Samples: 385758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:19:10,147][00194] Avg episode reward: [(0, '7.521')] +[2024-09-01 15:19:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1544192. Throughput: 0: 237.3. Samples: 387616. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:19:15,143][00194] Avg episode reward: [(0, '7.540')] +[2024-09-01 15:19:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1548288. Throughput: 0: 228.8. Samples: 388646. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:20,144][00194] Avg episode reward: [(0, '7.705')] +[2024-09-01 15:19:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1552384. Throughput: 0: 224.5. Samples: 389428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:25,139][00194] Avg episode reward: [(0, '7.571')] +[2024-09-01 15:19:28,646][03034] Updated weights for policy 0, policy_version 380 (0.0036) +[2024-09-01 15:19:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1556480. Throughput: 0: 227.3. Samples: 390730. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:30,139][00194] Avg episode reward: [(0, '7.616')] +[2024-09-01 15:19:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1560576. Throughput: 0: 241.3. Samples: 392472. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:35,139][00194] Avg episode reward: [(0, '7.622')] +[2024-09-01 15:19:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1564672. Throughput: 0: 228.7. Samples: 392762. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:19:40,146][00194] Avg episode reward: [(0, '7.711')] +[2024-09-01 15:19:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1568768. Throughput: 0: 221.4. Samples: 394184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:19:45,144][00194] Avg episode reward: [(0, '7.633')] +[2024-09-01 15:19:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1576960. Throughput: 0: 236.8. Samples: 395842. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:50,147][00194] Avg episode reward: [(0, '7.947')] +[2024-09-01 15:19:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1576960. Throughput: 0: 238.4. Samples: 396488. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:19:55,143][00194] Avg episode reward: [(0, '8.099')] +[2024-09-01 15:20:00,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1581056. Throughput: 0: 222.7. Samples: 397638. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:00,141][00194] Avg episode reward: [(0, '8.349')] +[2024-09-01 15:20:04,372][03021] Saving new best policy, reward=8.349! +[2024-09-01 15:20:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1589248. Throughput: 0: 229.9. Samples: 398990. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:05,147][00194] Avg episode reward: [(0, '8.654')] +[2024-09-01 15:20:08,131][03021] Saving new best policy, reward=8.654! +[2024-09-01 15:20:10,139][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1593344. Throughput: 0: 232.7. Samples: 399902. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:10,171][00194] Avg episode reward: [(0, '8.769')] +[2024-09-01 15:20:13,538][03021] Saving new best policy, reward=8.769! +[2024-09-01 15:20:13,562][03034] Updated weights for policy 0, policy_version 390 (0.0562) +[2024-09-01 15:20:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1597440. Throughput: 0: 227.4. Samples: 400964. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:15,141][00194] Avg episode reward: [(0, '8.925')] +[2024-09-01 15:20:18,534][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000391_1601536.pth... +[2024-09-01 15:20:18,642][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000338_1384448.pth +[2024-09-01 15:20:18,660][03021] Saving new best policy, reward=8.925! +[2024-09-01 15:20:20,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1601536. Throughput: 0: 220.8. Samples: 402406. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:20,139][00194] Avg episode reward: [(0, '8.885')] +[2024-09-01 15:20:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1605632. Throughput: 0: 228.4. Samples: 403040. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:25,139][00194] Avg episode reward: [(0, '9.058')] +[2024-09-01 15:20:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1609728. Throughput: 0: 235.3. Samples: 404774. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:30,139][00194] Avg episode reward: [(0, '9.159')] +[2024-09-01 15:20:31,569][03021] Saving new best policy, reward=9.058! +[2024-09-01 15:20:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1613824. Throughput: 0: 221.2. Samples: 405798. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:35,144][00194] Avg episode reward: [(0, '9.114')] +[2024-09-01 15:20:36,747][03021] Saving new best policy, reward=9.159! +[2024-09-01 15:20:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1617920. Throughput: 0: 221.7. Samples: 406466. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:40,139][00194] Avg episode reward: [(0, '9.150')] +[2024-09-01 15:20:45,137][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1626112. Throughput: 0: 233.1. Samples: 408126. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:45,141][00194] Avg episode reward: [(0, '9.159')] +[2024-09-01 15:20:50,136][00194] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1630208. Throughput: 0: 227.6. Samples: 409234. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:50,143][00194] Avg episode reward: [(0, '9.159')] +[2024-09-01 15:20:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1634304. Throughput: 0: 222.7. Samples: 409924. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:20:55,138][00194] Avg episode reward: [(0, '8.673')] +[2024-09-01 15:20:58,503][03034] Updated weights for policy 0, policy_version 400 (0.1013) +[2024-09-01 15:21:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1638400. Throughput: 0: 228.9. Samples: 411264. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:21:00,139][00194] Avg episode reward: [(0, '8.707')] +[2024-09-01 15:21:00,841][03021] Signal inference workers to stop experience collection... (400 times) +[2024-09-01 15:21:00,894][03034] InferenceWorker_p0-w0: stopping experience collection (400 times) +[2024-09-01 15:21:01,769][03021] Signal inference workers to resume experience collection... (400 times) +[2024-09-01 15:21:01,770][03034] InferenceWorker_p0-w0: resuming experience collection (400 times) +[2024-09-01 15:21:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1642496. Throughput: 0: 236.0. Samples: 413024. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:21:05,144][00194] Avg episode reward: [(0, '8.509')] +[2024-09-01 15:21:10,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1646592. Throughput: 0: 229.0. Samples: 413346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:21:10,145][00194] Avg episode reward: [(0, '8.860')] +[2024-09-01 15:21:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1650688. Throughput: 0: 225.6. Samples: 414924. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:21:15,145][00194] Avg episode reward: [(0, '8.898')] +[2024-09-01 15:21:20,136][00194] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1658880. Throughput: 0: 235.2. Samples: 416384. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:21:20,139][00194] Avg episode reward: [(0, '8.862')] +[2024-09-01 15:21:25,142][00194] Fps is (10 sec: 1228.0, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 1662976. Throughput: 0: 240.5. Samples: 417292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:21:25,145][00194] Avg episode reward: [(0, '8.784')] +[2024-09-01 15:21:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1667072. Throughput: 0: 225.9. Samples: 418292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:21:30,145][00194] Avg episode reward: [(0, '8.625')] +[2024-09-01 15:21:35,136][00194] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1671168. Throughput: 0: 235.0. Samples: 419808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:21:35,139][00194] Avg episode reward: [(0, '8.619')] +[2024-09-01 15:21:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1675264. Throughput: 0: 235.3. Samples: 420512. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:21:40,139][00194] Avg episode reward: [(0, '8.796')] +[2024-09-01 15:21:42,049][03034] Updated weights for policy 0, policy_version 410 (0.0512) +[2024-09-01 15:21:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1679360. Throughput: 0: 230.7. Samples: 421646. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:21:45,142][00194] Avg episode reward: [(0, '8.972')] +[2024-09-01 15:21:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1683456. Throughput: 0: 228.8. Samples: 423320. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:21:50,147][00194] Avg episode reward: [(0, '8.799')] +[2024-09-01 15:21:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1691648. Throughput: 0: 237.5. Samples: 424032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:21:55,141][00194] Avg episode reward: [(0, '8.738')] +[2024-09-01 15:22:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1695744. Throughput: 0: 232.9. Samples: 425404. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:00,142][00194] Avg episode reward: [(0, '8.934')] +[2024-09-01 15:22:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1699840. Throughput: 0: 224.1. Samples: 426470. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:05,138][00194] Avg episode reward: [(0, '8.926')] +[2024-09-01 15:22:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 1703936. Throughput: 0: 227.1. Samples: 427512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:10,139][00194] Avg episode reward: [(0, '9.215')] +[2024-09-01 15:22:12,456][03021] Saving new best policy, reward=9.215! +[2024-09-01 15:22:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1708032. Throughput: 0: 236.5. Samples: 428934. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:15,138][00194] Avg episode reward: [(0, '9.419')] +[2024-09-01 15:22:17,425][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000418_1712128.pth... +[2024-09-01 15:22:17,549][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000364_1490944.pth +[2024-09-01 15:22:17,568][03021] Saving new best policy, reward=9.419! +[2024-09-01 15:22:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1712128. Throughput: 0: 225.7. Samples: 429964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:22:20,144][00194] Avg episode reward: [(0, '9.369')] +[2024-09-01 15:22:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 1716224. Throughput: 0: 221.6. Samples: 430486. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:22:25,139][00194] Avg episode reward: [(0, '9.617')] +[2024-09-01 15:22:26,655][03021] Saving new best policy, reward=9.617! +[2024-09-01 15:22:26,665][03034] Updated weights for policy 0, policy_version 420 (0.0538) +[2024-09-01 15:22:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1720320. Throughput: 0: 238.7. Samples: 432388. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:30,144][00194] Avg episode reward: [(0, '9.930')] +[2024-09-01 15:22:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1724416. Throughput: 0: 227.9. Samples: 433574. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:35,138][00194] Avg episode reward: [(0, '9.955')] +[2024-09-01 15:22:35,506][03021] Saving new best policy, reward=9.930! +[2024-09-01 15:22:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1728512. Throughput: 0: 222.9. Samples: 434062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:22:40,139][00194] Avg episode reward: [(0, '10.560')] +[2024-09-01 15:22:40,830][03021] Saving new best policy, reward=9.955! +[2024-09-01 15:22:44,663][03021] Saving new best policy, reward=10.560! +[2024-09-01 15:22:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1736704. Throughput: 0: 229.2. Samples: 435716. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:45,138][00194] Avg episode reward: [(0, '10.784')] +[2024-09-01 15:22:48,436][03021] Saving new best policy, reward=10.784! +[2024-09-01 15:22:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1740800. Throughput: 0: 238.2. Samples: 437188. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:22:50,142][00194] Avg episode reward: [(0, '10.897')] +[2024-09-01 15:22:53,734][03021] Saving new best policy, reward=10.897! +[2024-09-01 15:22:55,140][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1744896. Throughput: 0: 227.2. Samples: 437736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:22:55,143][00194] Avg episode reward: [(0, '10.913')] +[2024-09-01 15:22:58,742][03021] Saving new best policy, reward=10.913! +[2024-09-01 15:23:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1748992. Throughput: 0: 218.8. Samples: 438778. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:23:00,138][00194] Avg episode reward: [(0, '11.191')] +[2024-09-01 15:23:02,560][03021] Saving new best policy, reward=11.191! +[2024-09-01 15:23:05,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1753088. Throughput: 0: 236.0. Samples: 440584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:05,140][00194] Avg episode reward: [(0, '11.546')] +[2024-09-01 15:23:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1757184. Throughput: 0: 238.4. Samples: 441212. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:10,139][00194] Avg episode reward: [(0, '11.671')] +[2024-09-01 15:23:11,874][03021] Saving new best policy, reward=11.546! +[2024-09-01 15:23:11,903][03034] Updated weights for policy 0, policy_version 430 (0.0095) +[2024-09-01 15:23:11,973][03021] Saving new best policy, reward=11.671! +[2024-09-01 15:23:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1761280. Throughput: 0: 218.6. Samples: 442226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:23:15,148][00194] Avg episode reward: [(0, '11.796')] +[2024-09-01 15:23:16,760][03021] Saving new best policy, reward=11.796! +[2024-09-01 15:23:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1765376. Throughput: 0: 229.0. Samples: 443878. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:23:20,142][00194] Avg episode reward: [(0, '11.817')] +[2024-09-01 15:23:24,269][03021] Saving new best policy, reward=11.817! +[2024-09-01 15:23:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1773568. Throughput: 0: 237.4. Samples: 444746. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:25,145][00194] Avg episode reward: [(0, '11.696')] +[2024-09-01 15:23:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 1777664. Throughput: 0: 228.0. Samples: 445978. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:30,138][00194] Avg episode reward: [(0, '11.624')] +[2024-09-01 15:23:35,137][00194] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1781760. Throughput: 0: 218.2. Samples: 447008. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:35,144][00194] Avg episode reward: [(0, '11.782')] +[2024-09-01 15:23:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 1785856. Throughput: 0: 229.6. Samples: 448068. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:40,141][00194] Avg episode reward: [(0, '11.478')] +[2024-09-01 15:23:45,137][00194] Fps is (10 sec: 819.2, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1789952. Throughput: 0: 236.1. Samples: 449402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:23:45,143][00194] Avg episode reward: [(0, '11.481')] +[2024-09-01 15:23:50,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 1794048. Throughput: 0: 219.1. Samples: 450444. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:23:50,141][00194] Avg episode reward: [(0, '11.190')] +[2024-09-01 15:23:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1798144. Throughput: 0: 220.0. Samples: 451112. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:23:55,140][00194] Avg episode reward: [(0, '11.275')] +[2024-09-01 15:23:57,823][03034] Updated weights for policy 0, policy_version 440 (0.1042) +[2024-09-01 15:24:00,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1802240. Throughput: 0: 224.4. Samples: 452324. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:24:00,142][00194] Avg episode reward: [(0, '10.995')] +[2024-09-01 15:24:05,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1802240. Throughput: 0: 206.5. Samples: 453172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:24:05,139][00194] Avg episode reward: [(0, '10.939')] +[2024-09-01 15:24:10,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1806336. Throughput: 0: 195.7. Samples: 453554. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:24:10,142][00194] Avg episode reward: [(0, '10.673')] +[2024-09-01 15:24:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1810432. Throughput: 0: 201.9. Samples: 455062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:24:15,138][00194] Avg episode reward: [(0, '10.622')] +[2024-09-01 15:24:18,898][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000444_1818624.pth... +[2024-09-01 15:24:19,000][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000391_1601536.pth +[2024-09-01 15:24:20,136][00194] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1818624. Throughput: 0: 210.0. Samples: 456458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:24:20,140][00194] Avg episode reward: [(0, '10.791')] +[2024-09-01 15:24:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1822720. Throughput: 0: 203.5. Samples: 457226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:25,138][00194] Avg episode reward: [(0, '10.800')] +[2024-09-01 15:24:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1826816. Throughput: 0: 196.8. Samples: 458258. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:30,139][00194] Avg episode reward: [(0, '10.674')] +[2024-09-01 15:24:35,139][00194] Fps is (10 sec: 818.9, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1830912. Throughput: 0: 206.7. Samples: 459744. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:35,142][00194] Avg episode reward: [(0, '10.785')] +[2024-09-01 15:24:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1835008. Throughput: 0: 205.1. Samples: 460340. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:40,150][00194] Avg episode reward: [(0, '10.915')] +[2024-09-01 15:24:45,137][00194] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 1839104. Throughput: 0: 206.4. Samples: 461610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:45,144][00194] Avg episode reward: [(0, '10.841')] +[2024-09-01 15:24:47,754][03034] Updated weights for policy 0, policy_version 450 (0.1988) +[2024-09-01 15:24:50,104][03021] Signal inference workers to stop experience collection... (450 times) +[2024-09-01 15:24:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 1843200. Throughput: 0: 220.3. Samples: 463084. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:24:50,139][00194] Avg episode reward: [(0, '10.751')] +[2024-09-01 15:24:50,176][03034] InferenceWorker_p0-w0: stopping experience collection (450 times) +[2024-09-01 15:24:51,090][03021] Signal inference workers to resume experience collection... (450 times) +[2024-09-01 15:24:51,092][03034] InferenceWorker_p0-w0: resuming experience collection (450 times) +[2024-09-01 15:24:55,136][00194] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 1851392. Throughput: 0: 227.1. Samples: 463774. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:24:55,138][00194] Avg episode reward: [(0, '10.648')] +[2024-09-01 15:25:00,138][00194] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 1855488. Throughput: 0: 230.3. Samples: 465428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:25:00,141][00194] Avg episode reward: [(0, '10.846')] +[2024-09-01 15:25:05,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1855488. Throughput: 0: 221.8. Samples: 466438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:25:05,143][00194] Avg episode reward: [(0, '11.166')] +[2024-09-01 15:25:10,136][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1863680. Throughput: 0: 225.9. Samples: 467392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:10,139][00194] Avg episode reward: [(0, '11.250')] +[2024-09-01 15:25:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1867776. Throughput: 0: 233.7. Samples: 468776. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:15,140][00194] Avg episode reward: [(0, '11.464')] +[2024-09-01 15:25:20,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 1871872. Throughput: 0: 223.7. Samples: 469810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:25:20,143][00194] Avg episode reward: [(0, '11.591')] +[2024-09-01 15:25:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1875968. Throughput: 0: 226.4. Samples: 470528. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:25:25,138][00194] Avg episode reward: [(0, '11.710')] +[2024-09-01 15:25:30,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1880064. Throughput: 0: 232.8. Samples: 472086. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:30,141][00194] Avg episode reward: [(0, '11.390')] +[2024-09-01 15:25:31,140][03034] Updated weights for policy 0, policy_version 460 (0.1982) +[2024-09-01 15:25:35,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1884160. Throughput: 0: 229.9. Samples: 473428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:35,139][00194] Avg episode reward: [(0, '11.091')] +[2024-09-01 15:25:40,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1888256. Throughput: 0: 225.8. Samples: 473934. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:40,144][00194] Avg episode reward: [(0, '11.271')] +[2024-09-01 15:25:45,136][00194] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1896448. Throughput: 0: 226.7. Samples: 475628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:45,144][00194] Avg episode reward: [(0, '11.524')] +[2024-09-01 15:25:50,136][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1900544. Throughput: 0: 235.2. Samples: 477024. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:25:50,141][00194] Avg episode reward: [(0, '11.603')] +[2024-09-01 15:25:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1904640. Throughput: 0: 229.6. Samples: 477724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:25:55,143][00194] Avg episode reward: [(0, '11.826')] +[2024-09-01 15:25:58,989][03021] Saving new best policy, reward=11.826! +[2024-09-01 15:26:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1908736. Throughput: 0: 220.5. Samples: 478700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:26:00,138][00194] Avg episode reward: [(0, '11.704')] +[2024-09-01 15:26:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1912832. Throughput: 0: 236.1. Samples: 480434. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:26:05,139][00194] Avg episode reward: [(0, '11.518')] +[2024-09-01 15:26:10,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 1916928. Throughput: 0: 234.1. Samples: 481064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:26:10,145][00194] Avg episode reward: [(0, '11.552')] +[2024-09-01 15:26:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1921024. Throughput: 0: 221.6. Samples: 482058. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:26:15,139][00194] Avg episode reward: [(0, '11.207')] +[2024-09-01 15:26:17,400][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000470_1925120.pth... +[2024-09-01 15:26:17,404][03034] Updated weights for policy 0, policy_version 470 (0.0541) +[2024-09-01 15:26:17,508][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000418_1712128.pth +[2024-09-01 15:26:20,136][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1925120. Throughput: 0: 230.8. Samples: 483814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:26:20,146][00194] Avg episode reward: [(0, '11.148')] +[2024-09-01 15:26:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1933312. Throughput: 0: 234.7. Samples: 484496. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:26:25,139][00194] Avg episode reward: [(0, '11.199')] +[2024-09-01 15:26:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1933312. Throughput: 0: 222.9. Samples: 485660. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:26:30,138][00194] Avg episode reward: [(0, '11.042')] +[2024-09-01 15:26:35,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1937408. Throughput: 0: 222.0. Samples: 487012. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:26:35,139][00194] Avg episode reward: [(0, '11.254')] +[2024-09-01 15:26:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1945600. Throughput: 0: 221.3. Samples: 487684. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:26:40,139][00194] Avg episode reward: [(0, '11.548')] +[2024-09-01 15:26:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 1949696. Throughput: 0: 231.0. Samples: 489094. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:26:45,138][00194] Avg episode reward: [(0, '11.684')] +[2024-09-01 15:26:50,136][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1953792. Throughput: 0: 215.8. Samples: 490144. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:26:50,146][00194] Avg episode reward: [(0, '11.510')] +[2024-09-01 15:26:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1957888. Throughput: 0: 223.5. Samples: 491122. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:26:55,139][00194] Avg episode reward: [(0, '12.261')] +[2024-09-01 15:26:57,382][03021] Saving new best policy, reward=12.261! +[2024-09-01 15:27:00,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1961984. Throughput: 0: 235.3. Samples: 492648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:00,150][00194] Avg episode reward: [(0, '11.878')] +[2024-09-01 15:27:01,620][03034] Updated weights for policy 0, policy_version 480 (0.1911) +[2024-09-01 15:27:05,141][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1966080. Throughput: 0: 224.6. Samples: 493920. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:05,143][00194] Avg episode reward: [(0, '12.115')] +[2024-09-01 15:27:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 1970176. Throughput: 0: 216.1. Samples: 494220. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:10,138][00194] Avg episode reward: [(0, '12.094')] +[2024-09-01 15:27:15,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1974272. Throughput: 0: 225.1. Samples: 495788. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:15,144][00194] Avg episode reward: [(0, '12.213')] +[2024-09-01 15:27:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1982464. Throughput: 0: 214.5. Samples: 496664. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:20,139][00194] Avg episode reward: [(0, '12.239')] +[2024-09-01 15:27:25,137][00194] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 1986560. Throughput: 0: 225.0. Samples: 497810. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:25,144][00194] Avg episode reward: [(0, '12.707')] +[2024-09-01 15:27:29,334][03021] Saving new best policy, reward=12.707! +[2024-09-01 15:27:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1990656. Throughput: 0: 224.4. Samples: 499190. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:27:30,138][00194] Avg episode reward: [(0, '12.989')] +[2024-09-01 15:27:33,268][03021] Saving new best policy, reward=12.989! +[2024-09-01 15:27:35,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1994752. Throughput: 0: 236.7. Samples: 500796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:27:35,142][00194] Avg episode reward: [(0, '13.343')] +[2024-09-01 15:27:37,268][03021] Saving new best policy, reward=13.343! +[2024-09-01 15:27:40,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1998848. Throughput: 0: 226.3. Samples: 501308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:27:40,142][00194] Avg episode reward: [(0, '13.433')] +[2024-09-01 15:27:43,086][03021] Saving new best policy, reward=13.433! +[2024-09-01 15:27:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2002944. Throughput: 0: 215.4. Samples: 502342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:27:45,139][00194] Avg episode reward: [(0, '13.881')] +[2024-09-01 15:27:47,477][03021] Saving new best policy, reward=13.881! +[2024-09-01 15:27:47,484][03034] Updated weights for policy 0, policy_version 490 (0.1174) +[2024-09-01 15:27:50,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2007040. Throughput: 0: 227.7. Samples: 504166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:27:50,141][00194] Avg episode reward: [(0, '14.077')] +[2024-09-01 15:27:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2011136. Throughput: 0: 235.4. Samples: 504812. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:27:55,139][00194] Avg episode reward: [(0, '14.071')] +[2024-09-01 15:27:55,360][03021] Saving new best policy, reward=14.077! +[2024-09-01 15:28:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2015232. Throughput: 0: 229.2. Samples: 506100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:28:00,139][00194] Avg episode reward: [(0, '14.053')] +[2024-09-01 15:28:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2019328. Throughput: 0: 238.3. Samples: 507386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:05,139][00194] Avg episode reward: [(0, '13.822')] +[2024-09-01 15:28:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2027520. Throughput: 0: 231.3. Samples: 508216. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:28:10,138][00194] Avg episode reward: [(0, '13.810')] +[2024-09-01 15:28:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2031616. Throughput: 0: 227.8. Samples: 509440. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:28:15,139][00194] Avg episode reward: [(0, '14.038')] +[2024-09-01 15:28:19,560][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000497_2035712.pth... +[2024-09-01 15:28:19,678][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000444_1818624.pth +[2024-09-01 15:28:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2035712. Throughput: 0: 214.7. Samples: 510456. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:20,139][00194] Avg episode reward: [(0, '13.959')] +[2024-09-01 15:28:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2039808. Throughput: 0: 226.7. Samples: 511510. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:25,139][00194] Avg episode reward: [(0, '14.159')] +[2024-09-01 15:28:27,379][03021] Saving new best policy, reward=14.159! +[2024-09-01 15:28:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2043904. Throughput: 0: 237.9. Samples: 513046. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:30,145][00194] Avg episode reward: [(0, '13.950')] +[2024-09-01 15:28:31,832][03034] Updated weights for policy 0, policy_version 500 (0.0071) +[2024-09-01 15:28:35,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2048000. Throughput: 0: 224.8. Samples: 514282. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:35,140][00194] Avg episode reward: [(0, '13.903')] +[2024-09-01 15:28:35,815][03021] Signal inference workers to stop experience collection... (500 times) +[2024-09-01 15:28:35,865][03034] InferenceWorker_p0-w0: stopping experience collection (500 times) +[2024-09-01 15:28:37,609][03021] Signal inference workers to resume experience collection... (500 times) +[2024-09-01 15:28:37,610][03034] InferenceWorker_p0-w0: resuming experience collection (500 times) +[2024-09-01 15:28:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2052096. Throughput: 0: 217.3. Samples: 514592. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:28:40,139][00194] Avg episode reward: [(0, '14.125')] +[2024-09-01 15:28:45,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2056192. Throughput: 0: 230.9. Samples: 516492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:28:45,141][00194] Avg episode reward: [(0, '13.500')] +[2024-09-01 15:28:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2064384. Throughput: 0: 228.4. Samples: 517662. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:50,143][00194] Avg episode reward: [(0, '13.957')] +[2024-09-01 15:28:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2064384. Throughput: 0: 224.3. Samples: 518310. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:28:55,138][00194] Avg episode reward: [(0, '13.801')] +[2024-09-01 15:29:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2072576. Throughput: 0: 227.5. Samples: 519678. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:29:00,139][00194] Avg episode reward: [(0, '13.794')] +[2024-09-01 15:29:05,136][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2076672. Throughput: 0: 241.3. Samples: 521314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:05,141][00194] Avg episode reward: [(0, '13.400')] +[2024-09-01 15:29:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2080768. Throughput: 0: 227.2. Samples: 521736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:10,141][00194] Avg episode reward: [(0, '13.401')] +[2024-09-01 15:29:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2084864. Throughput: 0: 215.9. Samples: 522762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:15,147][00194] Avg episode reward: [(0, '13.225')] +[2024-09-01 15:29:17,868][03034] Updated weights for policy 0, policy_version 510 (0.1507) +[2024-09-01 15:29:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2088960. Throughput: 0: 228.1. Samples: 524548. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:20,145][00194] Avg episode reward: [(0, '12.847')] +[2024-09-01 15:29:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2093056. Throughput: 0: 240.8. Samples: 525426. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:25,138][00194] Avg episode reward: [(0, '13.575')] +[2024-09-01 15:29:30,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2097152. Throughput: 0: 221.0. Samples: 526438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:30,141][00194] Avg episode reward: [(0, '13.734')] +[2024-09-01 15:29:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2101248. Throughput: 0: 228.0. Samples: 527920. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:35,138][00194] Avg episode reward: [(0, '13.735')] +[2024-09-01 15:29:40,136][00194] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2109440. Throughput: 0: 235.6. Samples: 528910. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:40,145][00194] Avg episode reward: [(0, '13.145')] +[2024-09-01 15:29:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2113536. Throughput: 0: 227.3. Samples: 529906. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:45,144][00194] Avg episode reward: [(0, '13.358')] +[2024-09-01 15:29:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2117632. Throughput: 0: 215.3. Samples: 531004. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:50,148][00194] Avg episode reward: [(0, '13.130')] +[2024-09-01 15:29:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2121728. Throughput: 0: 228.3. Samples: 532010. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:29:55,141][00194] Avg episode reward: [(0, '12.993')] +[2024-09-01 15:30:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2125824. Throughput: 0: 241.3. Samples: 533622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:00,147][00194] Avg episode reward: [(0, '12.821')] +[2024-09-01 15:30:02,181][03034] Updated weights for policy 0, policy_version 520 (0.0530) +[2024-09-01 15:30:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2129920. Throughput: 0: 223.8. Samples: 534620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:05,139][00194] Avg episode reward: [(0, '12.767')] +[2024-09-01 15:30:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2134016. Throughput: 0: 214.4. Samples: 535076. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:10,140][00194] Avg episode reward: [(0, '12.534')] +[2024-09-01 15:30:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2138112. Throughput: 0: 233.9. Samples: 536962. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:15,139][00194] Avg episode reward: [(0, '12.672')] +[2024-09-01 15:30:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2142208. Throughput: 0: 225.6. Samples: 538074. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:20,140][00194] Avg episode reward: [(0, '12.575')] +[2024-09-01 15:30:20,716][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000524_2146304.pth... +[2024-09-01 15:30:20,864][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000470_1925120.pth +[2024-09-01 15:30:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2146304. Throughput: 0: 214.1. Samples: 538544. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:25,147][00194] Avg episode reward: [(0, '12.826')] +[2024-09-01 15:30:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 2154496. Throughput: 0: 227.7. Samples: 540154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:30,139][00194] Avg episode reward: [(0, '13.402')] +[2024-09-01 15:30:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2158592. Throughput: 0: 236.4. Samples: 541642. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:35,143][00194] Avg episode reward: [(0, '13.861')] +[2024-09-01 15:30:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2162688. Throughput: 0: 227.0. Samples: 542226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:30:40,144][00194] Avg episode reward: [(0, '13.827')] +[2024-09-01 15:30:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2166784. Throughput: 0: 214.1. Samples: 543256. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:30:45,138][00194] Avg episode reward: [(0, '14.206')] +[2024-09-01 15:30:47,657][03021] Saving new best policy, reward=14.206! +[2024-09-01 15:30:47,662][03034] Updated weights for policy 0, policy_version 530 (0.2030) +[2024-09-01 15:30:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2170880. Throughput: 0: 232.8. Samples: 545094. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:30:50,145][00194] Avg episode reward: [(0, '14.067')] +[2024-09-01 15:30:55,141][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2174976. Throughput: 0: 235.2. Samples: 545660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:30:55,148][00194] Avg episode reward: [(0, '14.558')] +[2024-09-01 15:31:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2179072. Throughput: 0: 215.9. Samples: 546676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:31:00,139][00194] Avg episode reward: [(0, '14.374')] +[2024-09-01 15:31:01,840][03021] Saving new best policy, reward=14.558! +[2024-09-01 15:31:05,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2183168. Throughput: 0: 230.2. Samples: 548432. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:05,138][00194] Avg episode reward: [(0, '15.056')] +[2024-09-01 15:31:09,508][03021] Saving new best policy, reward=15.056! +[2024-09-01 15:31:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2191360. Throughput: 0: 236.4. Samples: 549184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:10,144][00194] Avg episode reward: [(0, '15.075')] +[2024-09-01 15:31:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2191360. Throughput: 0: 228.0. Samples: 550412. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:15,142][00194] Avg episode reward: [(0, '14.909')] +[2024-09-01 15:31:15,359][03021] Saving new best policy, reward=15.075! +[2024-09-01 15:31:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2199552. Throughput: 0: 218.2. Samples: 551462. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:20,140][00194] Avg episode reward: [(0, '15.050')] +[2024-09-01 15:31:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2203648. Throughput: 0: 225.3. Samples: 552364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:25,139][00194] Avg episode reward: [(0, '14.934')] +[2024-09-01 15:31:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2207744. Throughput: 0: 229.3. Samples: 553574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:30,138][00194] Avg episode reward: [(0, '15.070')] +[2024-09-01 15:31:33,184][03034] Updated weights for policy 0, policy_version 540 (0.1215) +[2024-09-01 15:31:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2211840. Throughput: 0: 216.8. Samples: 554852. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:35,139][00194] Avg episode reward: [(0, '15.499')] +[2024-09-01 15:31:37,828][03021] Saving new best policy, reward=15.499! +[2024-09-01 15:31:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2215936. Throughput: 0: 219.5. Samples: 555536. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:40,143][00194] Avg episode reward: [(0, '16.104')] +[2024-09-01 15:31:41,799][03021] Saving new best policy, reward=16.104! +[2024-09-01 15:31:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2220032. Throughput: 0: 236.4. Samples: 557316. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:45,147][00194] Avg episode reward: [(0, '16.763')] +[2024-09-01 15:31:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2224128. Throughput: 0: 220.5. Samples: 558356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:50,144][00194] Avg episode reward: [(0, '16.703')] +[2024-09-01 15:31:51,298][03021] Saving new best policy, reward=16.763! +[2024-09-01 15:31:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2228224. Throughput: 0: 216.8. Samples: 558938. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:31:55,138][00194] Avg episode reward: [(0, '16.208')] +[2024-09-01 15:32:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2236416. Throughput: 0: 229.5. Samples: 560740. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:00,138][00194] Avg episode reward: [(0, '16.198')] +[2024-09-01 15:32:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2240512. Throughput: 0: 234.5. Samples: 562014. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:32:05,143][00194] Avg episode reward: [(0, '16.078')] +[2024-09-01 15:32:10,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 2244608. Throughput: 0: 230.2. Samples: 562722. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:32:10,146][00194] Avg episode reward: [(0, '16.188')] +[2024-09-01 15:32:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2248704. Throughput: 0: 224.6. Samples: 563680. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:32:15,143][00194] Avg episode reward: [(0, '16.064')] +[2024-09-01 15:32:17,536][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000550_2252800.pth... +[2024-09-01 15:32:17,541][03034] Updated weights for policy 0, policy_version 550 (0.0679) +[2024-09-01 15:32:17,645][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000497_2035712.pth +[2024-09-01 15:32:19,875][03021] Signal inference workers to stop experience collection... (550 times) +[2024-09-01 15:32:19,925][03034] InferenceWorker_p0-w0: stopping experience collection (550 times) +[2024-09-01 15:32:20,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2252800. Throughput: 0: 238.3. Samples: 565576. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:32:20,147][00194] Avg episode reward: [(0, '15.370')] +[2024-09-01 15:32:21,340][03021] Signal inference workers to resume experience collection... (550 times) +[2024-09-01 15:32:21,342][03034] InferenceWorker_p0-w0: resuming experience collection (550 times) +[2024-09-01 15:32:25,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2256896. Throughput: 0: 230.9. Samples: 565930. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:25,147][00194] Avg episode reward: [(0, '15.204')] +[2024-09-01 15:32:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2260992. Throughput: 0: 218.4. Samples: 567144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:30,140][00194] Avg episode reward: [(0, '14.805')] +[2024-09-01 15:32:35,136][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2265088. Throughput: 0: 233.0. Samples: 568840. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:35,141][00194] Avg episode reward: [(0, '14.463')] +[2024-09-01 15:32:40,141][00194] Fps is (10 sec: 1228.1, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 2273280. Throughput: 0: 235.0. Samples: 569516. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:40,143][00194] Avg episode reward: [(0, '14.651')] +[2024-09-01 15:32:45,142][00194] Fps is (10 sec: 1228.0, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 2277376. Throughput: 0: 225.2. Samples: 570874. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:45,145][00194] Avg episode reward: [(0, '14.633')] +[2024-09-01 15:32:50,138][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2281472. Throughput: 0: 219.1. Samples: 571872. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:50,147][00194] Avg episode reward: [(0, '14.797')] +[2024-09-01 15:32:55,136][00194] Fps is (10 sec: 819.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2285568. Throughput: 0: 226.4. Samples: 572908. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:32:55,143][00194] Avg episode reward: [(0, '14.419')] +[2024-09-01 15:33:00,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2289664. Throughput: 0: 235.5. Samples: 574278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:00,142][00194] Avg episode reward: [(0, '14.241')] +[2024-09-01 15:33:03,758][03034] Updated weights for policy 0, policy_version 560 (0.2684) +[2024-09-01 15:33:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2293760. Throughput: 0: 213.2. Samples: 575170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:05,143][00194] Avg episode reward: [(0, '13.690')] +[2024-09-01 15:33:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2297856. Throughput: 0: 223.6. Samples: 575992. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:10,147][00194] Avg episode reward: [(0, '13.307')] +[2024-09-01 15:33:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2301952. Throughput: 0: 240.2. Samples: 577954. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:15,145][00194] Avg episode reward: [(0, '13.731')] +[2024-09-01 15:33:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2306048. Throughput: 0: 227.3. Samples: 579070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:20,141][00194] Avg episode reward: [(0, '13.223')] +[2024-09-01 15:33:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 2310144. Throughput: 0: 221.4. Samples: 579478. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:25,144][00194] Avg episode reward: [(0, '13.440')] +[2024-09-01 15:33:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2318336. Throughput: 0: 228.3. Samples: 581144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:30,139][00194] Avg episode reward: [(0, '13.476')] +[2024-09-01 15:33:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2322432. Throughput: 0: 237.3. Samples: 582552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:33:35,146][00194] Avg episode reward: [(0, '13.612')] +[2024-09-01 15:33:40,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2326528. Throughput: 0: 228.6. Samples: 583194. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:33:40,145][00194] Avg episode reward: [(0, '13.687')] +[2024-09-01 15:33:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 2330624. Throughput: 0: 222.4. Samples: 584286. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:45,139][00194] Avg episode reward: [(0, '14.422')] +[2024-09-01 15:33:47,104][03034] Updated weights for policy 0, policy_version 570 (0.0054) +[2024-09-01 15:33:50,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2334720. Throughput: 0: 246.4. Samples: 586260. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:50,146][00194] Avg episode reward: [(0, '14.781')] +[2024-09-01 15:33:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2338816. Throughput: 0: 237.9. Samples: 586696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:33:55,138][00194] Avg episode reward: [(0, '14.698')] +[2024-09-01 15:34:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2342912. Throughput: 0: 218.3. Samples: 587778. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:00,147][00194] Avg episode reward: [(0, '15.246')] +[2024-09-01 15:34:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2351104. Throughput: 0: 228.6. Samples: 589356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:05,138][00194] Avg episode reward: [(0, '15.118')] +[2024-09-01 15:34:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2351104. Throughput: 0: 235.2. Samples: 590064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:10,139][00194] Avg episode reward: [(0, '14.992')] +[2024-09-01 15:34:15,139][00194] Fps is (10 sec: 409.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2355200. Throughput: 0: 206.6. Samples: 590442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:15,152][00194] Avg episode reward: [(0, '14.992')] +[2024-09-01 15:34:20,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2355200. Throughput: 0: 196.9. Samples: 591414. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:20,138][00194] Avg episode reward: [(0, '14.807')] +[2024-09-01 15:34:21,310][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000576_2359296.pth... +[2024-09-01 15:34:21,418][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000524_2146304.pth +[2024-09-01 15:34:25,136][00194] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2359296. Throughput: 0: 194.6. Samples: 591950. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:34:25,146][00194] Avg episode reward: [(0, '14.632')] +[2024-09-01 15:34:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 2367488. Throughput: 0: 203.6. Samples: 593446. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:34:30,138][00194] Avg episode reward: [(0, '14.973')] +[2024-09-01 15:34:35,137][00194] Fps is (10 sec: 1228.7, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2371584. Throughput: 0: 185.0. Samples: 594584. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:34:35,143][00194] Avg episode reward: [(0, '14.855')] +[2024-09-01 15:34:39,882][03034] Updated weights for policy 0, policy_version 580 (0.0564) +[2024-09-01 15:34:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2375680. Throughput: 0: 190.8. Samples: 595282. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:40,144][00194] Avg episode reward: [(0, '14.892')] +[2024-09-01 15:34:45,136][00194] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2379776. Throughput: 0: 196.6. Samples: 596624. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:45,140][00194] Avg episode reward: [(0, '15.959')] +[2024-09-01 15:34:50,139][00194] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 888.6). Total num frames: 2383872. Throughput: 0: 194.1. Samples: 598090. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:50,142][00194] Avg episode reward: [(0, '16.658')] +[2024-09-01 15:34:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2387968. Throughput: 0: 189.3. Samples: 598584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:34:55,138][00194] Avg episode reward: [(0, '16.490')] +[2024-09-01 15:35:00,136][00194] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2392064. Throughput: 0: 213.8. Samples: 600064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:35:00,146][00194] Avg episode reward: [(0, '16.349')] +[2024-09-01 15:35:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 2400256. Throughput: 0: 228.1. Samples: 601678. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:05,145][00194] Avg episode reward: [(0, '16.273')] +[2024-09-01 15:35:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2400256. Throughput: 0: 232.9. Samples: 602432. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:10,139][00194] Avg episode reward: [(0, '15.851')] +[2024-09-01 15:35:15,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2404352. Throughput: 0: 218.2. Samples: 603266. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:15,145][00194] Avg episode reward: [(0, '16.457')] +[2024-09-01 15:35:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2412544. Throughput: 0: 229.7. Samples: 604922. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:35:20,140][00194] Avg episode reward: [(0, '16.213')] +[2024-09-01 15:35:22,868][03034] Updated weights for policy 0, policy_version 590 (0.1754) +[2024-09-01 15:35:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2416640. Throughput: 0: 232.8. Samples: 605760. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:35:25,141][00194] Avg episode reward: [(0, '16.573')] +[2024-09-01 15:35:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2420736. Throughput: 0: 228.1. Samples: 606888. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:35:30,139][00194] Avg episode reward: [(0, '16.385')] +[2024-09-01 15:35:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2424832. Throughput: 0: 230.5. Samples: 608460. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:35,144][00194] Avg episode reward: [(0, '16.451')] +[2024-09-01 15:35:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2428928. Throughput: 0: 235.0. Samples: 609160. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:40,145][00194] Avg episode reward: [(0, '16.384')] +[2024-09-01 15:35:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2433024. Throughput: 0: 232.8. Samples: 610542. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:35:45,146][00194] Avg episode reward: [(0, '17.090')] +[2024-09-01 15:35:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2437120. Throughput: 0: 224.0. Samples: 611756. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:35:50,149][00194] Avg episode reward: [(0, '17.433')] +[2024-09-01 15:35:51,015][03021] Saving new best policy, reward=17.090! +[2024-09-01 15:35:54,856][03021] Saving new best policy, reward=17.433! +[2024-09-01 15:35:55,142][00194] Fps is (10 sec: 1228.1, 60 sec: 955.6, 300 sec: 902.5). Total num frames: 2445312. Throughput: 0: 224.5. Samples: 612538. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:35:55,151][00194] Avg episode reward: [(0, '17.449')] +[2024-09-01 15:35:58,768][03021] Saving new best policy, reward=17.449! +[2024-09-01 15:36:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2449408. Throughput: 0: 238.0. Samples: 613974. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:36:00,143][00194] Avg episode reward: [(0, '17.573')] +[2024-09-01 15:36:04,424][03021] Saving new best policy, reward=17.573! +[2024-09-01 15:36:05,136][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2453504. Throughput: 0: 225.2. Samples: 615056. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:36:05,138][00194] Avg episode reward: [(0, '17.573')] +[2024-09-01 15:36:09,370][03034] Updated weights for policy 0, policy_version 600 (0.0525) +[2024-09-01 15:36:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2457600. Throughput: 0: 223.4. Samples: 615814. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:10,140][00194] Avg episode reward: [(0, '17.511')] +[2024-09-01 15:36:11,675][03021] Signal inference workers to stop experience collection... (600 times) +[2024-09-01 15:36:11,733][03034] InferenceWorker_p0-w0: stopping experience collection (600 times) +[2024-09-01 15:36:13,146][03021] Signal inference workers to resume experience collection... (600 times) +[2024-09-01 15:36:13,149][03034] InferenceWorker_p0-w0: resuming experience collection (600 times) +[2024-09-01 15:36:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2461696. Throughput: 0: 227.2. Samples: 617114. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:15,146][00194] Avg episode reward: [(0, '17.478')] +[2024-09-01 15:36:17,116][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000602_2465792.pth... +[2024-09-01 15:36:17,234][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000550_2252800.pth +[2024-09-01 15:36:20,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2465792. Throughput: 0: 226.9. Samples: 618672. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:36:20,141][00194] Avg episode reward: [(0, '17.277')] +[2024-09-01 15:36:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2469888. Throughput: 0: 221.2. Samples: 619116. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:36:25,138][00194] Avg episode reward: [(0, '17.390')] +[2024-09-01 15:36:30,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2473984. Throughput: 0: 221.9. Samples: 620528. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:30,152][00194] Avg episode reward: [(0, '16.622')] +[2024-09-01 15:36:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2478080. Throughput: 0: 231.1. Samples: 622156. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:35,138][00194] Avg episode reward: [(0, '17.234')] +[2024-09-01 15:36:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2482176. Throughput: 0: 225.5. Samples: 622684. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:36:40,139][00194] Avg episode reward: [(0, '17.375')] +[2024-09-01 15:36:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2486272. Throughput: 0: 216.7. Samples: 623726. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:36:45,149][00194] Avg episode reward: [(0, '17.829')] +[2024-09-01 15:36:49,590][03021] Saving new best policy, reward=17.829! +[2024-09-01 15:36:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2494464. Throughput: 0: 227.0. Samples: 625272. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:50,138][00194] Avg episode reward: [(0, '17.889')] +[2024-09-01 15:36:53,746][03021] Saving new best policy, reward=17.889! +[2024-09-01 15:36:53,783][03034] Updated weights for policy 0, policy_version 610 (0.1219) +[2024-09-01 15:36:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2498560. Throughput: 0: 232.4. Samples: 626274. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:36:55,139][00194] Avg episode reward: [(0, '17.831')] +[2024-09-01 15:37:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2502656. Throughput: 0: 226.3. Samples: 627296. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:00,139][00194] Avg episode reward: [(0, '18.390')] +[2024-09-01 15:37:03,909][03021] Saving new best policy, reward=18.390! +[2024-09-01 15:37:05,137][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2506752. Throughput: 0: 220.3. Samples: 628584. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:05,144][00194] Avg episode reward: [(0, '18.006')] +[2024-09-01 15:37:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2510848. Throughput: 0: 227.5. Samples: 629352. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:10,146][00194] Avg episode reward: [(0, '18.789')] +[2024-09-01 15:37:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2514944. Throughput: 0: 226.4. Samples: 630718. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:15,145][00194] Avg episode reward: [(0, '18.796')] +[2024-09-01 15:37:17,807][03021] Saving new best policy, reward=18.789! +[2024-09-01 15:37:17,936][03021] Saving new best policy, reward=18.796! +[2024-09-01 15:37:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2519040. Throughput: 0: 216.6. Samples: 631904. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:20,138][00194] Avg episode reward: [(0, '18.491')] +[2024-09-01 15:37:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2523136. Throughput: 0: 218.2. Samples: 632502. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:25,138][00194] Avg episode reward: [(0, '18.033')] +[2024-09-01 15:37:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2527232. Throughput: 0: 235.9. Samples: 634340. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:30,140][00194] Avg episode reward: [(0, '18.248')] +[2024-09-01 15:37:35,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 874.8). Total num frames: 2531328. Throughput: 0: 224.7. Samples: 635384. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:35,158][00194] Avg episode reward: [(0, '18.016')] +[2024-09-01 15:37:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 2535424. Throughput: 0: 214.1. Samples: 635908. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:40,139][00194] Avg episode reward: [(0, '18.389')] +[2024-09-01 15:37:41,074][03034] Updated weights for policy 0, policy_version 620 (0.1017) +[2024-09-01 15:37:45,137][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2543616. Throughput: 0: 229.6. Samples: 637630. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:37:45,140][00194] Avg episode reward: [(0, '17.935')] +[2024-09-01 15:37:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2547712. Throughput: 0: 225.9. Samples: 638748. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:37:50,144][00194] Avg episode reward: [(0, '17.932')] +[2024-09-01 15:37:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2551808. Throughput: 0: 223.8. Samples: 639422. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:37:55,140][00194] Avg episode reward: [(0, '17.345')] +[2024-09-01 15:38:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2555904. Throughput: 0: 221.8. Samples: 640700. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:38:00,140][00194] Avg episode reward: [(0, '17.246')] +[2024-09-01 15:38:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2560000. Throughput: 0: 237.5. Samples: 642592. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:38:05,138][00194] Avg episode reward: [(0, '17.003')] +[2024-09-01 15:38:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2564096. Throughput: 0: 228.9. Samples: 642804. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:38:10,139][00194] Avg episode reward: [(0, '16.978')] +[2024-09-01 15:38:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2568192. Throughput: 0: 213.9. Samples: 643964. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:38:15,141][00194] Avg episode reward: [(0, '16.547')] +[2024-09-01 15:38:19,957][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000629_2576384.pth... +[2024-09-01 15:38:20,074][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000576_2359296.pth +[2024-09-01 15:38:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2576384. Throughput: 0: 228.1. Samples: 645646. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:38:20,144][00194] Avg episode reward: [(0, '16.768')] +[2024-09-01 15:38:24,865][03034] Updated weights for policy 0, policy_version 630 (0.1022) +[2024-09-01 15:38:25,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2580480. Throughput: 0: 237.9. Samples: 646612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:38:25,146][00194] Avg episode reward: [(0, '16.866')] +[2024-09-01 15:38:30,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2580480. Throughput: 0: 221.6. Samples: 647600. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:38:30,146][00194] Avg episode reward: [(0, '16.842')] +[2024-09-01 15:38:35,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.8, 300 sec: 888.6). Total num frames: 2588672. Throughput: 0: 227.3. Samples: 648976. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:38:35,143][00194] Avg episode reward: [(0, '16.657')] +[2024-09-01 15:38:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2592768. Throughput: 0: 229.5. Samples: 649750. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:38:40,143][00194] Avg episode reward: [(0, '16.729')] +[2024-09-01 15:38:45,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2596864. Throughput: 0: 228.4. Samples: 650980. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:38:45,150][00194] Avg episode reward: [(0, '16.740')] +[2024-09-01 15:38:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2600960. Throughput: 0: 217.1. Samples: 652362. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:38:50,139][00194] Avg episode reward: [(0, '17.368')] +[2024-09-01 15:38:55,136][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2605056. Throughput: 0: 228.0. Samples: 653062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:38:55,143][00194] Avg episode reward: [(0, '17.468')] +[2024-09-01 15:39:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2609152. Throughput: 0: 240.8. Samples: 654800. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:00,141][00194] Avg episode reward: [(0, '17.162')] +[2024-09-01 15:39:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2613248. Throughput: 0: 224.4. Samples: 655744. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:05,142][00194] Avg episode reward: [(0, '17.014')] +[2024-09-01 15:39:09,782][03034] Updated weights for policy 0, policy_version 640 (0.0048) +[2024-09-01 15:39:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2621440. Throughput: 0: 219.4. Samples: 656484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:10,141][00194] Avg episode reward: [(0, '17.488')] +[2024-09-01 15:39:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2625536. Throughput: 0: 228.6. Samples: 657888. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:15,141][00194] Avg episode reward: [(0, '17.710')] +[2024-09-01 15:39:20,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 2625536. Throughput: 0: 220.0. Samples: 658876. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:20,141][00194] Avg episode reward: [(0, '18.127')] +[2024-09-01 15:39:25,136][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2629632. Throughput: 0: 213.5. Samples: 659356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:39:25,145][00194] Avg episode reward: [(0, '18.380')] +[2024-09-01 15:39:30,136][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2637824. Throughput: 0: 220.2. Samples: 660886. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:30,145][00194] Avg episode reward: [(0, '18.273')] +[2024-09-01 15:39:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2641920. Throughput: 0: 222.2. Samples: 662362. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:35,144][00194] Avg episode reward: [(0, '17.777')] +[2024-09-01 15:39:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2646016. Throughput: 0: 219.1. Samples: 662922. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:40,139][00194] Avg episode reward: [(0, '18.493')] +[2024-09-01 15:39:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 2650112. Throughput: 0: 204.6. Samples: 664006. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:45,139][00194] Avg episode reward: [(0, '18.301')] +[2024-09-01 15:39:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2654208. Throughput: 0: 228.3. Samples: 666016. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:50,146][00194] Avg episode reward: [(0, '18.586')] +[2024-09-01 15:39:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2658304. Throughput: 0: 220.9. Samples: 666426. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:39:55,142][00194] Avg episode reward: [(0, '19.128')] +[2024-09-01 15:39:56,763][03021] Saving new best policy, reward=19.128! +[2024-09-01 15:39:56,753][03034] Updated weights for policy 0, policy_version 650 (0.1637) +[2024-09-01 15:40:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2662400. Throughput: 0: 215.0. Samples: 667564. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:00,138][00194] Avg episode reward: [(0, '19.077')] +[2024-09-01 15:40:00,191][03021] Signal inference workers to stop experience collection... (650 times) +[2024-09-01 15:40:00,249][03034] InferenceWorker_p0-w0: stopping experience collection (650 times) +[2024-09-01 15:40:01,517][03021] Signal inference workers to resume experience collection... (650 times) +[2024-09-01 15:40:01,519][03034] InferenceWorker_p0-w0: resuming experience collection (650 times) +[2024-09-01 15:40:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2666496. Throughput: 0: 228.1. Samples: 669140. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:05,138][00194] Avg episode reward: [(0, '18.654')] +[2024-09-01 15:40:10,136][00194] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 2674688. Throughput: 0: 237.8. Samples: 670056. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:10,143][00194] Avg episode reward: [(0, '18.396')] +[2024-09-01 15:40:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2678784. Throughput: 0: 225.3. Samples: 671024. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:15,141][00194] Avg episode reward: [(0, '17.993')] +[2024-09-01 15:40:19,739][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000655_2682880.pth... +[2024-09-01 15:40:19,847][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000602_2465792.pth +[2024-09-01 15:40:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2682880. Throughput: 0: 218.4. Samples: 672188. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:20,143][00194] Avg episode reward: [(0, '17.373')] +[2024-09-01 15:40:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2686976. Throughput: 0: 228.8. Samples: 673218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:25,138][00194] Avg episode reward: [(0, '17.249')] +[2024-09-01 15:40:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2691072. Throughput: 0: 235.5. Samples: 674604. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:30,145][00194] Avg episode reward: [(0, '17.045')] +[2024-09-01 15:40:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2695168. Throughput: 0: 214.0. Samples: 675644. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:35,142][00194] Avg episode reward: [(0, '17.239')] +[2024-09-01 15:40:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2699264. Throughput: 0: 219.5. Samples: 676302. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:40,142][00194] Avg episode reward: [(0, '17.996')] +[2024-09-01 15:40:42,042][03034] Updated weights for policy 0, policy_version 660 (0.2248) +[2024-09-01 15:40:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2703360. Throughput: 0: 232.5. Samples: 678028. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:45,145][00194] Avg episode reward: [(0, '17.965')] +[2024-09-01 15:40:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2707456. Throughput: 0: 224.2. Samples: 679230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:40:50,142][00194] Avg episode reward: [(0, '17.388')] +[2024-09-01 15:40:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2711552. Throughput: 0: 215.1. Samples: 679734. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:40:55,141][00194] Avg episode reward: [(0, '17.932')] +[2024-09-01 15:41:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2715648. Throughput: 0: 226.7. Samples: 681224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:41:00,148][00194] Avg episode reward: [(0, '17.388')] +[2024-09-01 15:41:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2719744. Throughput: 0: 228.8. Samples: 682486. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:05,138][00194] Avg episode reward: [(0, '17.630')] +[2024-09-01 15:41:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2723840. Throughput: 0: 215.4. Samples: 682910. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:10,143][00194] Avg episode reward: [(0, '17.784')] +[2024-09-01 15:41:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 2727936. Throughput: 0: 211.7. Samples: 684130. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:15,145][00194] Avg episode reward: [(0, '18.055')] +[2024-09-01 15:41:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2736128. Throughput: 0: 222.1. Samples: 685640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:20,147][00194] Avg episode reward: [(0, '18.308')] +[2024-09-01 15:41:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2740224. Throughput: 0: 228.4. Samples: 686580. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:25,142][00194] Avg episode reward: [(0, '18.215')] +[2024-09-01 15:41:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2744320. Throughput: 0: 212.9. Samples: 687610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:30,146][00194] Avg episode reward: [(0, '17.663')] +[2024-09-01 15:41:30,168][03034] Updated weights for policy 0, policy_version 670 (0.2184) +[2024-09-01 15:41:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2748416. Throughput: 0: 219.0. Samples: 689086. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:35,144][00194] Avg episode reward: [(0, '17.081')] +[2024-09-01 15:41:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2752512. Throughput: 0: 222.7. Samples: 689756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:40,138][00194] Avg episode reward: [(0, '17.206')] +[2024-09-01 15:41:45,136][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2756608. Throughput: 0: 217.5. Samples: 691010. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:45,145][00194] Avg episode reward: [(0, '17.206')] +[2024-09-01 15:41:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2760704. Throughput: 0: 217.5. Samples: 692274. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:50,146][00194] Avg episode reward: [(0, '17.321')] +[2024-09-01 15:41:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2764800. Throughput: 0: 227.1. Samples: 693128. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:41:55,141][00194] Avg episode reward: [(0, '17.597')] +[2024-09-01 15:42:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2768896. Throughput: 0: 235.4. Samples: 694724. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:42:00,144][00194] Avg episode reward: [(0, '17.215')] +[2024-09-01 15:42:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2772992. Throughput: 0: 224.6. Samples: 695746. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:42:05,138][00194] Avg episode reward: [(0, '17.520')] +[2024-09-01 15:42:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2781184. Throughput: 0: 217.6. Samples: 696372. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:42:10,142][00194] Avg episode reward: [(0, '17.605')] +[2024-09-01 15:42:13,787][03034] Updated weights for policy 0, policy_version 680 (0.0583) +[2024-09-01 15:42:15,139][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2785280. Throughput: 0: 225.9. Samples: 697778. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:42:15,141][00194] Avg episode reward: [(0, '16.796')] +[2024-09-01 15:42:19,309][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000681_2789376.pth... +[2024-09-01 15:42:19,422][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000629_2576384.pth +[2024-09-01 15:42:20,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2789376. Throughput: 0: 217.9. Samples: 698890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:42:20,141][00194] Avg episode reward: [(0, '16.508')] +[2024-09-01 15:42:25,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2793472. Throughput: 0: 219.1. Samples: 699614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:42:25,144][00194] Avg episode reward: [(0, '17.590')] +[2024-09-01 15:42:30,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2797568. Throughput: 0: 222.2. Samples: 701008. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:42:30,143][00194] Avg episode reward: [(0, '16.690')] +[2024-09-01 15:42:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2801664. Throughput: 0: 230.4. Samples: 702642. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:42:35,139][00194] Avg episode reward: [(0, '16.547')] +[2024-09-01 15:42:40,140][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2805760. Throughput: 0: 217.7. Samples: 702926. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:42:40,146][00194] Avg episode reward: [(0, '16.577')] +[2024-09-01 15:42:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2809856. Throughput: 0: 216.4. Samples: 704460. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:42:45,138][00194] Avg episode reward: [(0, '16.622')] +[2024-09-01 15:42:50,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2813952. Throughput: 0: 231.8. Samples: 706178. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:42:50,147][00194] Avg episode reward: [(0, '16.403')] +[2024-09-01 15:42:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2818048. Throughput: 0: 228.3. Samples: 706644. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:42:55,139][00194] Avg episode reward: [(0, '16.105')] +[2024-09-01 15:43:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2822144. Throughput: 0: 222.1. Samples: 707770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:43:00,144][00194] Avg episode reward: [(0, '16.257')] +[2024-09-01 15:43:01,308][03034] Updated weights for policy 0, policy_version 690 (0.0534) +[2024-09-01 15:43:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2830336. Throughput: 0: 231.7. Samples: 709314. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:43:05,139][00194] Avg episode reward: [(0, '16.304')] +[2024-09-01 15:43:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2834432. Throughput: 0: 238.5. Samples: 710348. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:43:10,143][00194] Avg episode reward: [(0, '16.727')] +[2024-09-01 15:43:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2838528. Throughput: 0: 231.1. Samples: 711408. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:43:15,140][00194] Avg episode reward: [(0, '17.210')] +[2024-09-01 15:43:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2842624. Throughput: 0: 219.7. Samples: 712528. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:20,138][00194] Avg episode reward: [(0, '18.390')] +[2024-09-01 15:43:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2846720. Throughput: 0: 233.8. Samples: 713446. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:25,139][00194] Avg episode reward: [(0, '18.378')] +[2024-09-01 15:43:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2850816. Throughput: 0: 234.7. Samples: 715020. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:30,145][00194] Avg episode reward: [(0, '18.375')] +[2024-09-01 15:43:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2854912. Throughput: 0: 220.0. Samples: 716076. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:35,139][00194] Avg episode reward: [(0, '18.811')] +[2024-09-01 15:43:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2859008. Throughput: 0: 222.2. Samples: 716644. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:40,147][00194] Avg episode reward: [(0, '18.913')] +[2024-09-01 15:43:44,829][03034] Updated weights for policy 0, policy_version 700 (0.2145) +[2024-09-01 15:43:45,138][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2867200. Throughput: 0: 240.0. Samples: 718572. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:45,146][00194] Avg episode reward: [(0, '19.036')] +[2024-09-01 15:43:48,737][03021] Signal inference workers to stop experience collection... (700 times) +[2024-09-01 15:43:48,830][03034] InferenceWorker_p0-w0: stopping experience collection (700 times) +[2024-09-01 15:43:49,947][03021] Signal inference workers to resume experience collection... (700 times) +[2024-09-01 15:43:49,948][03034] InferenceWorker_p0-w0: resuming experience collection (700 times) +[2024-09-01 15:43:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2871296. Throughput: 0: 228.0. Samples: 719576. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:50,139][00194] Avg episode reward: [(0, '19.069')] +[2024-09-01 15:43:55,136][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2875392. Throughput: 0: 218.9. Samples: 720200. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:43:55,139][00194] Avg episode reward: [(0, '19.063')] +[2024-09-01 15:44:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2879488. Throughput: 0: 226.7. Samples: 721610. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:44:00,139][00194] Avg episode reward: [(0, '20.121')] +[2024-09-01 15:44:02,307][03021] Saving new best policy, reward=20.121! +[2024-09-01 15:44:05,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2883584. Throughput: 0: 239.2. Samples: 723294. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:44:05,142][00194] Avg episode reward: [(0, '20.386')] +[2024-09-01 15:44:07,824][03021] Saving new best policy, reward=20.386! +[2024-09-01 15:44:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2887680. Throughput: 0: 225.6. Samples: 723600. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:44:10,144][00194] Avg episode reward: [(0, '20.153')] +[2024-09-01 15:44:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2891776. Throughput: 0: 217.6. Samples: 724810. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 15:44:15,139][00194] Avg episode reward: [(0, '20.230')] +[2024-09-01 15:44:16,410][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000707_2895872.pth... +[2024-09-01 15:44:16,520][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000655_2682880.pth +[2024-09-01 15:44:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2895872. Throughput: 0: 236.0. Samples: 726698. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 15:44:20,140][00194] Avg episode reward: [(0, '20.219')] +[2024-09-01 15:44:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2899968. Throughput: 0: 232.0. Samples: 727084. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:44:25,142][00194] Avg episode reward: [(0, '19.611')] +[2024-09-01 15:44:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2904064. Throughput: 0: 217.2. Samples: 728346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:44:30,149][00194] Avg episode reward: [(0, '20.497')] +[2024-09-01 15:44:30,732][03034] Updated weights for policy 0, policy_version 710 (0.1086) +[2024-09-01 15:44:34,562][03021] Saving new best policy, reward=20.497! +[2024-09-01 15:44:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2912256. Throughput: 0: 226.9. Samples: 729788. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 15:44:35,143][00194] Avg episode reward: [(0, '20.699')] +[2024-09-01 15:44:38,448][03021] Saving new best policy, reward=20.699! +[2024-09-01 15:44:40,140][00194] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2916352. Throughput: 0: 235.6. Samples: 730804. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 15:44:40,146][00194] Avg episode reward: [(0, '20.725')] +[2024-09-01 15:44:44,411][03021] Saving new best policy, reward=20.725! +[2024-09-01 15:44:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2920448. Throughput: 0: 228.4. Samples: 731888. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 15:44:45,141][00194] Avg episode reward: [(0, '20.492')] +[2024-09-01 15:44:50,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2924544. Throughput: 0: 219.4. Samples: 733168. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:44:50,139][00194] Avg episode reward: [(0, '20.748')] +[2024-09-01 15:44:52,926][03021] Saving new best policy, reward=20.748! +[2024-09-01 15:44:55,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2928640. Throughput: 0: 229.1. Samples: 733912. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:44:55,150][00194] Avg episode reward: [(0, '21.064')] +[2024-09-01 15:44:57,026][03021] Saving new best policy, reward=21.064! +[2024-09-01 15:45:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2932736. Throughput: 0: 233.4. Samples: 735312. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:00,142][00194] Avg episode reward: [(0, '20.611')] +[2024-09-01 15:45:05,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2936832. Throughput: 0: 216.8. Samples: 736456. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:05,140][00194] Avg episode reward: [(0, '20.114')] +[2024-09-01 15:45:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2940928. Throughput: 0: 223.9. Samples: 737160. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:10,141][00194] Avg episode reward: [(0, '19.898')] +[2024-09-01 15:45:14,918][03034] Updated weights for policy 0, policy_version 720 (0.0056) +[2024-09-01 15:45:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2949120. Throughput: 0: 237.5. Samples: 739032. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:15,139][00194] Avg episode reward: [(0, '20.473')] +[2024-09-01 15:45:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2949120. Throughput: 0: 228.0. Samples: 740048. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:20,139][00194] Avg episode reward: [(0, '20.979')] +[2024-09-01 15:45:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2957312. Throughput: 0: 217.4. Samples: 740584. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:25,140][00194] Avg episode reward: [(0, '20.871')] +[2024-09-01 15:45:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2961408. Throughput: 0: 226.6. Samples: 742084. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:30,138][00194] Avg episode reward: [(0, '21.252')] +[2024-09-01 15:45:32,582][03021] Saving new best policy, reward=21.252! +[2024-09-01 15:45:35,139][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 2965504. Throughput: 0: 230.3. Samples: 743532. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:35,143][00194] Avg episode reward: [(0, '21.427')] +[2024-09-01 15:45:38,651][03021] Saving new best policy, reward=21.427! +[2024-09-01 15:45:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2969600. Throughput: 0: 225.8. Samples: 744070. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 15:45:40,138][00194] Avg episode reward: [(0, '21.569')] +[2024-09-01 15:45:42,848][03021] Saving new best policy, reward=21.569! +[2024-09-01 15:45:45,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2973696. Throughput: 0: 223.2. Samples: 745354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:45:45,144][00194] Avg episode reward: [(0, '21.570')] +[2024-09-01 15:45:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2977792. Throughput: 0: 236.5. Samples: 747098. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:45:50,142][00194] Avg episode reward: [(0, '21.226')] +[2024-09-01 15:45:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2981888. Throughput: 0: 230.2. Samples: 747520. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:45:55,142][00194] Avg episode reward: [(0, '21.167')] +[2024-09-01 15:46:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2985984. Throughput: 0: 214.4. Samples: 748678. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:00,144][00194] Avg episode reward: [(0, '21.265')] +[2024-09-01 15:46:00,792][03034] Updated weights for policy 0, policy_version 730 (0.0059) +[2024-09-01 15:46:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2994176. Throughput: 0: 225.2. Samples: 750184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:46:05,148][00194] Avg episode reward: [(0, '21.036')] +[2024-09-01 15:46:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 2998272. Throughput: 0: 235.3. Samples: 751172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:10,143][00194] Avg episode reward: [(0, '21.323')] +[2024-09-01 15:46:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3002368. Throughput: 0: 223.9. Samples: 752158. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:15,138][00194] Avg episode reward: [(0, '22.063')] +[2024-09-01 15:46:18,718][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000734_3006464.pth... +[2024-09-01 15:46:18,823][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000681_2789376.pth +[2024-09-01 15:46:18,834][03021] Saving new best policy, reward=22.063! +[2024-09-01 15:46:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3006464. Throughput: 0: 224.1. Samples: 753616. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:20,145][00194] Avg episode reward: [(0, '21.768')] +[2024-09-01 15:46:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3010560. Throughput: 0: 226.8. Samples: 754274. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:25,142][00194] Avg episode reward: [(0, '21.474')] +[2024-09-01 15:46:30,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3014656. Throughput: 0: 231.1. Samples: 755754. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:30,143][00194] Avg episode reward: [(0, '21.770')] +[2024-09-01 15:46:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3018752. Throughput: 0: 218.1. Samples: 756912. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:35,138][00194] Avg episode reward: [(0, '21.529')] +[2024-09-01 15:46:40,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3022848. Throughput: 0: 225.1. Samples: 757648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:46:40,147][00194] Avg episode reward: [(0, '22.132')] +[2024-09-01 15:46:44,223][03021] Saving new best policy, reward=22.132! +[2024-09-01 15:46:44,235][03034] Updated weights for policy 0, policy_version 740 (0.1655) +[2024-09-01 15:46:45,144][00194] Fps is (10 sec: 1227.7, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 3031040. Throughput: 0: 238.3. Samples: 759402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:46:45,149][00194] Avg episode reward: [(0, '21.245')] +[2024-09-01 15:46:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3035136. Throughput: 0: 226.7. Samples: 760386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:46:50,138][00194] Avg episode reward: [(0, '21.317')] +[2024-09-01 15:46:55,136][00194] Fps is (10 sec: 819.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3039232. Throughput: 0: 215.9. Samples: 760886. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:46:55,139][00194] Avg episode reward: [(0, '21.480')] +[2024-09-01 15:47:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3043328. Throughput: 0: 229.9. Samples: 762502. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:47:00,148][00194] Avg episode reward: [(0, '20.783')] +[2024-09-01 15:47:05,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3047424. Throughput: 0: 229.4. Samples: 763938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:47:05,140][00194] Avg episode reward: [(0, '20.889')] +[2024-09-01 15:47:10,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3051520. Throughput: 0: 224.8. Samples: 764392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:47:10,145][00194] Avg episode reward: [(0, '20.672')] +[2024-09-01 15:47:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3055616. Throughput: 0: 226.2. Samples: 765932. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:47:15,144][00194] Avg episode reward: [(0, '19.423')] +[2024-09-01 15:47:20,136][00194] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3063808. Throughput: 0: 234.4. Samples: 767462. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:47:20,138][00194] Avg episode reward: [(0, '19.466')] +[2024-09-01 15:47:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3063808. Throughput: 0: 233.9. Samples: 768174. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:47:25,138][00194] Avg episode reward: [(0, '19.382')] +[2024-09-01 15:47:30,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3067904. Throughput: 0: 218.4. Samples: 769230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:47:30,139][00194] Avg episode reward: [(0, '19.509')] +[2024-09-01 15:47:30,957][03034] Updated weights for policy 0, policy_version 750 (0.2263) +[2024-09-01 15:47:33,253][03021] Signal inference workers to stop experience collection... (750 times) +[2024-09-01 15:47:33,307][03034] InferenceWorker_p0-w0: stopping experience collection (750 times) +[2024-09-01 15:47:34,199][03021] Signal inference workers to resume experience collection... (750 times) +[2024-09-01 15:47:34,201][03034] InferenceWorker_p0-w0: resuming experience collection (750 times) +[2024-09-01 15:47:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3076096. Throughput: 0: 226.4. Samples: 770576. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:47:35,138][00194] Avg episode reward: [(0, '19.567')] +[2024-09-01 15:47:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3080192. Throughput: 0: 236.5. Samples: 771530. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:47:40,146][00194] Avg episode reward: [(0, '19.152')] +[2024-09-01 15:47:45,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 3084288. Throughput: 0: 225.1. Samples: 772634. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:47:45,140][00194] Avg episode reward: [(0, '19.631')] +[2024-09-01 15:47:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3088384. Throughput: 0: 223.1. Samples: 773978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:47:50,143][00194] Avg episode reward: [(0, '19.936')] +[2024-09-01 15:47:55,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3092480. Throughput: 0: 231.4. Samples: 774804. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:47:55,147][00194] Avg episode reward: [(0, '19.917')] +[2024-09-01 15:48:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3096576. Throughput: 0: 232.5. Samples: 776396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:48:00,138][00194] Avg episode reward: [(0, '19.718')] +[2024-09-01 15:48:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3100672. Throughput: 0: 222.0. Samples: 777450. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:48:05,139][00194] Avg episode reward: [(0, '19.588')] +[2024-09-01 15:48:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 3108864. Throughput: 0: 223.5. Samples: 778230. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:48:10,139][00194] Avg episode reward: [(0, '19.439')] +[2024-09-01 15:48:13,726][03034] Updated weights for policy 0, policy_version 760 (0.1012) +[2024-09-01 15:48:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3112960. Throughput: 0: 233.4. Samples: 779732. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:48:15,138][00194] Avg episode reward: [(0, '19.651')] +[2024-09-01 15:48:18,866][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000761_3117056.pth... +[2024-09-01 15:48:18,957][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000707_2895872.pth +[2024-09-01 15:48:20,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3117056. Throughput: 0: 228.6. Samples: 780862. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:48:20,148][00194] Avg episode reward: [(0, '19.440')] +[2024-09-01 15:48:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3121152. Throughput: 0: 223.7. Samples: 781596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:48:25,139][00194] Avg episode reward: [(0, '19.644')] +[2024-09-01 15:48:30,136][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3125248. Throughput: 0: 232.1. Samples: 783078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:48:30,138][00194] Avg episode reward: [(0, '20.063')] +[2024-09-01 15:48:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3129344. Throughput: 0: 232.0. Samples: 784416. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:48:35,145][00194] Avg episode reward: [(0, '20.287')] +[2024-09-01 15:48:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3133440. Throughput: 0: 226.0. Samples: 784972. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:48:40,140][00194] Avg episode reward: [(0, '19.418')] +[2024-09-01 15:48:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3137536. Throughput: 0: 224.2. Samples: 786486. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:48:45,142][00194] Avg episode reward: [(0, '19.444')] +[2024-09-01 15:48:50,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3145728. Throughput: 0: 232.2. Samples: 787900. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:48:50,140][00194] Avg episode reward: [(0, '19.318')] +[2024-09-01 15:48:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3145728. Throughput: 0: 229.4. Samples: 788552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:48:55,140][00194] Avg episode reward: [(0, '19.341')] +[2024-09-01 15:49:00,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3149824. Throughput: 0: 222.4. Samples: 789742. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:49:00,148][00194] Avg episode reward: [(0, '19.888')] +[2024-09-01 15:49:00,843][03034] Updated weights for policy 0, policy_version 770 (0.2036) +[2024-09-01 15:49:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3158016. Throughput: 0: 225.8. Samples: 791024. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:49:05,138][00194] Avg episode reward: [(0, '20.481')] +[2024-09-01 15:49:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3162112. Throughput: 0: 230.9. Samples: 791988. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:49:10,139][00194] Avg episode reward: [(0, '21.718')] +[2024-09-01 15:49:15,141][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3166208. Throughput: 0: 222.5. Samples: 793090. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:49:15,144][00194] Avg episode reward: [(0, '22.134')] +[2024-09-01 15:49:18,338][03021] Saving new best policy, reward=22.134! +[2024-09-01 15:49:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3170304. Throughput: 0: 225.4. Samples: 794558. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:49:20,139][00194] Avg episode reward: [(0, '21.760')] +[2024-09-01 15:49:25,137][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3174400. Throughput: 0: 228.4. Samples: 795248. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:49:25,139][00194] Avg episode reward: [(0, '22.142')] +[2024-09-01 15:49:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3178496. Throughput: 0: 229.4. Samples: 796810. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:49:30,142][00194] Avg episode reward: [(0, '22.131')] +[2024-09-01 15:49:31,648][03021] Saving new best policy, reward=22.142! +[2024-09-01 15:49:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3182592. Throughput: 0: 221.9. Samples: 797884. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 15:49:35,139][00194] Avg episode reward: [(0, '22.599')] +[2024-09-01 15:49:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3186688. Throughput: 0: 224.4. Samples: 798648. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:49:40,147][00194] Avg episode reward: [(0, '22.329')] +[2024-09-01 15:49:40,183][03021] Saving new best policy, reward=22.599! +[2024-09-01 15:49:44,449][03034] Updated weights for policy 0, policy_version 780 (0.1073) +[2024-09-01 15:49:45,138][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3194880. Throughput: 0: 231.7. Samples: 800170. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:49:45,141][00194] Avg episode reward: [(0, '22.209')] +[2024-09-01 15:49:50,136][00194] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3198976. Throughput: 0: 224.9. Samples: 801146. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:49:50,147][00194] Avg episode reward: [(0, '22.078')] +[2024-09-01 15:49:55,136][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3203072. Throughput: 0: 219.7. Samples: 801876. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:49:55,138][00194] Avg episode reward: [(0, '21.512')] +[2024-09-01 15:50:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3207168. Throughput: 0: 226.1. Samples: 803262. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:00,146][00194] Avg episode reward: [(0, '21.933')] +[2024-09-01 15:50:05,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3211264. Throughput: 0: 227.3. Samples: 804786. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:05,144][00194] Avg episode reward: [(0, '22.340')] +[2024-09-01 15:50:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3215360. Throughput: 0: 221.6. Samples: 805222. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:10,140][00194] Avg episode reward: [(0, '22.113')] +[2024-09-01 15:50:15,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3219456. Throughput: 0: 214.0. Samples: 806438. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:15,144][00194] Avg episode reward: [(0, '22.157')] +[2024-09-01 15:50:16,523][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000787_3223552.pth... +[2024-09-01 15:50:16,635][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000734_3006464.pth +[2024-09-01 15:50:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3223552. Throughput: 0: 232.2. Samples: 808334. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:20,145][00194] Avg episode reward: [(0, '22.172')] +[2024-09-01 15:50:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3227648. Throughput: 0: 223.5. Samples: 808706. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:25,143][00194] Avg episode reward: [(0, '22.708')] +[2024-09-01 15:50:26,192][03021] Saving new best policy, reward=22.708! +[2024-09-01 15:50:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3231744. Throughput: 0: 217.7. Samples: 809964. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:30,148][00194] Avg episode reward: [(0, '22.729')] +[2024-09-01 15:50:31,092][03034] Updated weights for policy 0, policy_version 790 (0.1658) +[2024-09-01 15:50:34,963][03021] Saving new best policy, reward=22.729! +[2024-09-01 15:50:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3239936. Throughput: 0: 230.4. Samples: 811516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:35,140][00194] Avg episode reward: [(0, '22.713')] +[2024-09-01 15:50:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3244032. Throughput: 0: 236.8. Samples: 812534. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:40,142][00194] Avg episode reward: [(0, '22.768')] +[2024-09-01 15:50:44,838][03021] Saving new best policy, reward=22.768! +[2024-09-01 15:50:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3248128. Throughput: 0: 227.6. Samples: 813502. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:50:45,142][00194] Avg episode reward: [(0, '22.931')] +[2024-09-01 15:50:49,345][03021] Saving new best policy, reward=22.931! +[2024-09-01 15:50:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3252224. Throughput: 0: 219.9. Samples: 814680. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:50:50,145][00194] Avg episode reward: [(0, '22.894')] +[2024-09-01 15:50:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3256320. Throughput: 0: 230.9. Samples: 815614. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:50:55,138][00194] Avg episode reward: [(0, '22.140')] +[2024-09-01 15:51:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3260416. Throughput: 0: 233.3. Samples: 816938. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:00,144][00194] Avg episode reward: [(0, '21.306')] +[2024-09-01 15:51:05,140][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3264512. Throughput: 0: 215.8. Samples: 818048. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:05,142][00194] Avg episode reward: [(0, '21.112')] +[2024-09-01 15:51:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3268608. Throughput: 0: 223.4. Samples: 818760. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:10,148][00194] Avg episode reward: [(0, '21.764')] +[2024-09-01 15:51:15,040][03034] Updated weights for policy 0, policy_version 800 (0.0701) +[2024-09-01 15:51:15,136][00194] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3276800. Throughput: 0: 239.1. Samples: 820724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:51:15,140][00194] Avg episode reward: [(0, '21.618')] +[2024-09-01 15:51:18,744][03021] Signal inference workers to stop experience collection... (800 times) +[2024-09-01 15:51:18,854][03034] InferenceWorker_p0-w0: stopping experience collection (800 times) +[2024-09-01 15:51:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3276800. Throughput: 0: 225.5. Samples: 821662. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:51:20,139][00194] Avg episode reward: [(0, '20.990')] +[2024-09-01 15:51:20,683][03021] Signal inference workers to resume experience collection... (800 times) +[2024-09-01 15:51:20,684][03034] InferenceWorker_p0-w0: resuming experience collection (800 times) +[2024-09-01 15:51:25,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3280896. Throughput: 0: 213.0. Samples: 822118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:51:25,151][00194] Avg episode reward: [(0, '20.894')] +[2024-09-01 15:51:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3289088. Throughput: 0: 227.7. Samples: 823750. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:30,138][00194] Avg episode reward: [(0, '21.078')] +[2024-09-01 15:51:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3293184. Throughput: 0: 233.4. Samples: 825184. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:51:35,143][00194] Avg episode reward: [(0, '21.050')] +[2024-09-01 15:51:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3297280. Throughput: 0: 225.6. Samples: 825768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 15:51:40,139][00194] Avg episode reward: [(0, '21.044')] +[2024-09-01 15:51:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3301376. Throughput: 0: 221.7. Samples: 826916. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:45,139][00194] Avg episode reward: [(0, '21.593')] +[2024-09-01 15:51:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3305472. Throughput: 0: 242.6. Samples: 828966. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:50,145][00194] Avg episode reward: [(0, '21.868')] +[2024-09-01 15:51:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3309568. Throughput: 0: 235.1. Samples: 829338. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:51:55,141][00194] Avg episode reward: [(0, '21.348')] +[2024-09-01 15:52:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3313664. Throughput: 0: 215.9. Samples: 830438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:52:00,142][00194] Avg episode reward: [(0, '21.471')] +[2024-09-01 15:52:01,626][03034] Updated weights for policy 0, policy_version 810 (0.1549) +[2024-09-01 15:52:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 3321856. Throughput: 0: 230.6. Samples: 832038. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:05,139][00194] Avg episode reward: [(0, '21.186')] +[2024-09-01 15:52:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3325952. Throughput: 0: 243.1. Samples: 833058. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:10,144][00194] Avg episode reward: [(0, '21.748')] +[2024-09-01 15:52:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3330048. Throughput: 0: 228.8. Samples: 834046. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:15,142][00194] Avg episode reward: [(0, '21.807')] +[2024-09-01 15:52:19,027][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000814_3334144.pth... +[2024-09-01 15:52:19,134][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000761_3117056.pth +[2024-09-01 15:52:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3334144. Throughput: 0: 226.2. Samples: 835364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:20,140][00194] Avg episode reward: [(0, '21.584')] +[2024-09-01 15:52:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3338240. Throughput: 0: 231.3. Samples: 836176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:25,150][00194] Avg episode reward: [(0, '20.947')] +[2024-09-01 15:52:30,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3342336. Throughput: 0: 235.4. Samples: 837508. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:52:30,142][00194] Avg episode reward: [(0, '21.145')] +[2024-09-01 15:52:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3346432. Throughput: 0: 216.8. Samples: 838722. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:52:35,138][00194] Avg episode reward: [(0, '21.075')] +[2024-09-01 15:52:40,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3350528. Throughput: 0: 224.1. Samples: 839422. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:52:40,147][00194] Avg episode reward: [(0, '20.888')] +[2024-09-01 15:52:44,980][03034] Updated weights for policy 0, policy_version 820 (0.1016) +[2024-09-01 15:52:45,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3358720. Throughput: 0: 238.8. Samples: 841186. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:45,139][00194] Avg episode reward: [(0, '21.004')] +[2024-09-01 15:52:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3358720. Throughput: 0: 226.3. Samples: 842220. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:50,139][00194] Avg episode reward: [(0, '20.840')] +[2024-09-01 15:52:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3366912. Throughput: 0: 217.8. Samples: 842858. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:52:55,141][00194] Avg episode reward: [(0, '20.895')] +[2024-09-01 15:53:00,136][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3371008. Throughput: 0: 228.1. Samples: 844312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:00,139][00194] Avg episode reward: [(0, '21.330')] +[2024-09-01 15:53:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3375104. Throughput: 0: 228.2. Samples: 845632. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:05,143][00194] Avg episode reward: [(0, '21.007')] +[2024-09-01 15:53:10,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3379200. Throughput: 0: 224.8. Samples: 846290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:10,139][00194] Avg episode reward: [(0, '20.868')] +[2024-09-01 15:53:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3383296. Throughput: 0: 224.8. Samples: 847624. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:15,138][00194] Avg episode reward: [(0, '21.265')] +[2024-09-01 15:53:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3387392. Throughput: 0: 235.4. Samples: 849314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:20,140][00194] Avg episode reward: [(0, '20.929')] +[2024-09-01 15:53:25,142][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3391488. Throughput: 0: 229.9. Samples: 849768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:53:25,145][00194] Avg episode reward: [(0, '21.351')] +[2024-09-01 15:53:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3395584. Throughput: 0: 216.4. Samples: 850922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:53:30,149][00194] Avg episode reward: [(0, '21.766')] +[2024-09-01 15:53:30,761][03034] Updated weights for policy 0, policy_version 830 (0.2096) +[2024-09-01 15:53:35,136][00194] Fps is (10 sec: 1229.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3403776. Throughput: 0: 227.9. Samples: 852476. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:53:35,146][00194] Avg episode reward: [(0, '21.151')] +[2024-09-01 15:53:40,137][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3407872. Throughput: 0: 236.3. Samples: 853494. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:40,140][00194] Avg episode reward: [(0, '21.429')] +[2024-09-01 15:53:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3411968. Throughput: 0: 227.2. Samples: 854534. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:53:45,144][00194] Avg episode reward: [(0, '21.514')] +[2024-09-01 15:53:50,136][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3416064. Throughput: 0: 226.6. Samples: 855828. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:53:50,139][00194] Avg episode reward: [(0, '21.650')] +[2024-09-01 15:53:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3420160. Throughput: 0: 232.0. Samples: 856730. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:53:55,144][00194] Avg episode reward: [(0, '21.326')] +[2024-09-01 15:54:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3424256. Throughput: 0: 231.4. Samples: 858036. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:54:00,144][00194] Avg episode reward: [(0, '21.083')] +[2024-09-01 15:54:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3428352. Throughput: 0: 222.5. Samples: 859326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:54:05,138][00194] Avg episode reward: [(0, '21.089')] +[2024-09-01 15:54:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3432448. Throughput: 0: 227.6. Samples: 860010. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:10,146][00194] Avg episode reward: [(0, '20.824')] +[2024-09-01 15:54:14,856][03034] Updated weights for policy 0, policy_version 840 (0.1524) +[2024-09-01 15:54:15,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3440640. Throughput: 0: 240.3. Samples: 861734. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:15,139][00194] Avg episode reward: [(0, '21.211')] +[2024-09-01 15:54:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3440640. Throughput: 0: 227.5. Samples: 862714. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:20,144][00194] Avg episode reward: [(0, '21.716')] +[2024-09-01 15:54:20,352][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000841_3444736.pth... +[2024-09-01 15:54:20,468][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000787_3223552.pth +[2024-09-01 15:54:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 3448832. Throughput: 0: 226.2. Samples: 863674. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:25,139][00194] Avg episode reward: [(0, '21.447')] +[2024-09-01 15:54:30,137][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3452928. Throughput: 0: 229.6. Samples: 864866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:30,140][00194] Avg episode reward: [(0, '21.911')] +[2024-09-01 15:54:35,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3457024. Throughput: 0: 230.3. Samples: 866194. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:35,139][00194] Avg episode reward: [(0, '21.879')] +[2024-09-01 15:54:40,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3461120. Throughput: 0: 224.9. Samples: 866850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:54:40,141][00194] Avg episode reward: [(0, '21.792')] +[2024-09-01 15:54:45,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3465216. Throughput: 0: 226.0. Samples: 868208. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:54:45,139][00194] Avg episode reward: [(0, '21.688')] +[2024-09-01 15:54:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3469312. Throughput: 0: 235.2. Samples: 869912. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:54:50,139][00194] Avg episode reward: [(0, '21.989')] +[2024-09-01 15:54:55,138][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3473408. Throughput: 0: 225.6. Samples: 870164. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:54:55,140][00194] Avg episode reward: [(0, '21.854')] +[2024-09-01 15:55:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3477504. Throughput: 0: 223.3. Samples: 871784. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:00,139][00194] Avg episode reward: [(0, '22.389')] +[2024-09-01 15:55:00,217][03034] Updated weights for policy 0, policy_version 850 (0.1723) +[2024-09-01 15:55:02,590][03021] Signal inference workers to stop experience collection... (850 times) +[2024-09-01 15:55:02,648][03034] InferenceWorker_p0-w0: stopping experience collection (850 times) +[2024-09-01 15:55:03,981][03021] Signal inference workers to resume experience collection... (850 times) +[2024-09-01 15:55:03,983][03034] InferenceWorker_p0-w0: resuming experience collection (850 times) +[2024-09-01 15:55:05,136][00194] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3485696. Throughput: 0: 228.7. Samples: 873006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:05,141][00194] Avg episode reward: [(0, '21.823')] +[2024-09-01 15:55:10,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3489792. Throughput: 0: 227.0. Samples: 873890. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:10,140][00194] Avg episode reward: [(0, '21.858')] +[2024-09-01 15:55:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3493888. Throughput: 0: 226.2. Samples: 875046. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:15,144][00194] Avg episode reward: [(0, '21.996')] +[2024-09-01 15:55:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3497984. Throughput: 0: 231.9. Samples: 876628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:20,144][00194] Avg episode reward: [(0, '21.823')] +[2024-09-01 15:55:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3502080. Throughput: 0: 232.5. Samples: 877312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:25,139][00194] Avg episode reward: [(0, '22.289')] +[2024-09-01 15:55:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3506176. Throughput: 0: 226.8. Samples: 878412. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:55:30,144][00194] Avg episode reward: [(0, '22.016')] +[2024-09-01 15:55:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3510272. Throughput: 0: 223.4. Samples: 879966. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:55:35,139][00194] Avg episode reward: [(0, '22.156')] +[2024-09-01 15:55:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3518464. Throughput: 0: 233.2. Samples: 880656. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:55:40,141][00194] Avg episode reward: [(0, '22.198')] +[2024-09-01 15:55:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3518464. Throughput: 0: 230.8. Samples: 882170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:55:45,139][00194] Avg episode reward: [(0, '21.941')] +[2024-09-01 15:55:45,174][03034] Updated weights for policy 0, policy_version 860 (0.0529) +[2024-09-01 15:55:50,136][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3522560. Throughput: 0: 227.2. Samples: 883228. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:50,144][00194] Avg episode reward: [(0, '22.074')] +[2024-09-01 15:55:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 3530752. Throughput: 0: 223.4. Samples: 883944. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:55:55,138][00194] Avg episode reward: [(0, '22.073')] +[2024-09-01 15:56:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3534848. Throughput: 0: 228.3. Samples: 885318. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:00,144][00194] Avg episode reward: [(0, '22.349')] +[2024-09-01 15:56:05,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3538944. Throughput: 0: 222.0. Samples: 886618. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:05,143][00194] Avg episode reward: [(0, '22.816')] +[2024-09-01 15:56:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3543040. Throughput: 0: 222.9. Samples: 887342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:10,143][00194] Avg episode reward: [(0, '23.400')] +[2024-09-01 15:56:12,309][03021] Saving new best policy, reward=23.400! +[2024-09-01 15:56:15,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3547136. Throughput: 0: 232.2. Samples: 888860. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:15,146][00194] Avg episode reward: [(0, '23.841')] +[2024-09-01 15:56:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3551232. Throughput: 0: 231.4. Samples: 890380. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:20,141][00194] Avg episode reward: [(0, '23.955')] +[2024-09-01 15:56:21,064][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000868_3555328.pth... +[2024-09-01 15:56:21,245][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000814_3334144.pth +[2024-09-01 15:56:21,269][03021] Saving new best policy, reward=23.841! +[2024-09-01 15:56:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3555328. Throughput: 0: 223.7. Samples: 890724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:25,140][00194] Avg episode reward: [(0, '24.086')] +[2024-09-01 15:56:26,449][03021] Saving new best policy, reward=23.955! +[2024-09-01 15:56:26,578][03021] Saving new best policy, reward=24.086! +[2024-09-01 15:56:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3559424. Throughput: 0: 223.8. Samples: 892242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:30,138][00194] Avg episode reward: [(0, '23.894')] +[2024-09-01 15:56:30,884][03034] Updated weights for policy 0, policy_version 870 (0.1527) +[2024-09-01 15:56:35,143][00194] Fps is (10 sec: 1227.9, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 3567616. Throughput: 0: 228.1. Samples: 893494. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:35,151][00194] Avg episode reward: [(0, '24.334')] +[2024-09-01 15:56:39,670][03021] Saving new best policy, reward=24.334! +[2024-09-01 15:56:40,141][00194] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3571712. Throughput: 0: 227.5. Samples: 894184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:40,144][00194] Avg episode reward: [(0, '24.897')] +[2024-09-01 15:56:44,827][03021] Saving new best policy, reward=24.897! +[2024-09-01 15:56:45,136][00194] Fps is (10 sec: 819.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3575808. Throughput: 0: 224.9. Samples: 895438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:45,138][00194] Avg episode reward: [(0, '24.088')] +[2024-09-01 15:56:50,136][00194] Fps is (10 sec: 819.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3579904. Throughput: 0: 229.5. Samples: 896944. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:50,139][00194] Avg episode reward: [(0, '23.959')] +[2024-09-01 15:56:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3584000. Throughput: 0: 228.2. Samples: 897610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:56:55,146][00194] Avg episode reward: [(0, '24.016')] +[2024-09-01 15:57:00,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3588096. Throughput: 0: 218.5. Samples: 898694. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:00,150][00194] Avg episode reward: [(0, '23.223')] +[2024-09-01 15:57:05,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3592192. Throughput: 0: 219.6. Samples: 900264. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:05,146][00194] Avg episode reward: [(0, '23.286')] +[2024-09-01 15:57:10,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3596288. Throughput: 0: 222.5. Samples: 900738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:57:10,148][00194] Avg episode reward: [(0, '23.471')] +[2024-09-01 15:57:15,145][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3600384. Throughput: 0: 227.9. Samples: 902498. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:57:15,148][00194] Avg episode reward: [(0, '23.546')] +[2024-09-01 15:57:16,193][03034] Updated weights for policy 0, policy_version 880 (0.1056) +[2024-09-01 15:57:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3604480. Throughput: 0: 225.2. Samples: 903626. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:57:20,138][00194] Avg episode reward: [(0, '23.941')] +[2024-09-01 15:57:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3612672. Throughput: 0: 228.4. Samples: 904460. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:25,146][00194] Avg episode reward: [(0, '23.880')] +[2024-09-01 15:57:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3616768. Throughput: 0: 232.1. Samples: 905882. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:30,138][00194] Avg episode reward: [(0, '23.270')] +[2024-09-01 15:57:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 3620864. Throughput: 0: 222.6. Samples: 906962. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:35,138][00194] Avg episode reward: [(0, '23.261')] +[2024-09-01 15:57:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3624960. Throughput: 0: 224.9. Samples: 907730. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:57:40,141][00194] Avg episode reward: [(0, '23.318')] +[2024-09-01 15:57:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3629056. Throughput: 0: 235.6. Samples: 909294. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:57:45,145][00194] Avg episode reward: [(0, '22.652')] +[2024-09-01 15:57:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3633152. Throughput: 0: 235.8. Samples: 910876. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:57:50,144][00194] Avg episode reward: [(0, '22.464')] +[2024-09-01 15:57:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3637248. Throughput: 0: 227.6. Samples: 910982. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:57:55,141][00194] Avg episode reward: [(0, '21.841')] +[2024-09-01 15:58:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3641344. Throughput: 0: 226.2. Samples: 912676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:58:00,149][00194] Avg episode reward: [(0, '21.566')] +[2024-09-01 15:58:00,665][03034] Updated weights for policy 0, policy_version 890 (0.1654) +[2024-09-01 15:58:05,138][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3649536. Throughput: 0: 229.5. Samples: 913952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:58:05,141][00194] Avg episode reward: [(0, '21.470')] +[2024-09-01 15:58:10,138][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3653632. Throughput: 0: 226.7. Samples: 914660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:58:10,140][00194] Avg episode reward: [(0, '20.831')] +[2024-09-01 15:58:15,136][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3657728. Throughput: 0: 225.2. Samples: 916018. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:58:15,138][00194] Avg episode reward: [(0, '20.667')] +[2024-09-01 15:58:18,419][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000894_3661824.pth... +[2024-09-01 15:58:18,539][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000841_3444736.pth +[2024-09-01 15:58:20,136][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3661824. Throughput: 0: 235.7. Samples: 917570. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:20,138][00194] Avg episode reward: [(0, '21.120')] +[2024-09-01 15:58:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3665920. Throughput: 0: 233.9. Samples: 918254. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:25,139][00194] Avg episode reward: [(0, '20.924')] +[2024-09-01 15:58:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3670016. Throughput: 0: 222.7. Samples: 919316. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:30,142][00194] Avg episode reward: [(0, '21.131')] +[2024-09-01 15:58:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3674112. Throughput: 0: 223.5. Samples: 920932. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:35,139][00194] Avg episode reward: [(0, '20.997')] +[2024-09-01 15:58:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3678208. Throughput: 0: 236.2. Samples: 921612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:40,139][00194] Avg episode reward: [(0, '21.241')] +[2024-09-01 15:58:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3682304. Throughput: 0: 231.7. Samples: 923102. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:58:45,138][00194] Avg episode reward: [(0, '21.549')] +[2024-09-01 15:58:45,651][03034] Updated weights for policy 0, policy_version 900 (0.0575) +[2024-09-01 15:58:49,322][03021] Signal inference workers to stop experience collection... (900 times) +[2024-09-01 15:58:49,396][03034] InferenceWorker_p0-w0: stopping experience collection (900 times) +[2024-09-01 15:58:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3686400. Throughput: 0: 226.9. Samples: 924160. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 15:58:50,141][00194] Avg episode reward: [(0, '21.557')] +[2024-09-01 15:58:50,492][03021] Signal inference workers to resume experience collection... (900 times) +[2024-09-01 15:58:50,493][03034] InferenceWorker_p0-w0: resuming experience collection (900 times) +[2024-09-01 15:58:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3694592. Throughput: 0: 230.4. Samples: 925028. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:58:55,145][00194] Avg episode reward: [(0, '22.134')] +[2024-09-01 15:59:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3698688. Throughput: 0: 230.0. Samples: 926366. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:59:00,140][00194] Avg episode reward: [(0, '21.879')] +[2024-09-01 15:59:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3702784. Throughput: 0: 218.3. Samples: 927392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:59:05,139][00194] Avg episode reward: [(0, '21.876')] +[2024-09-01 15:59:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3706880. Throughput: 0: 222.0. Samples: 928244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:59:10,144][00194] Avg episode reward: [(0, '21.649')] +[2024-09-01 15:59:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3710976. Throughput: 0: 231.6. Samples: 929736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:59:15,148][00194] Avg episode reward: [(0, '21.283')] +[2024-09-01 15:59:20,141][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3715072. Throughput: 0: 224.1. Samples: 931016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 15:59:20,150][00194] Avg episode reward: [(0, '21.560')] +[2024-09-01 15:59:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3719168. Throughput: 0: 221.4. Samples: 931574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:59:25,144][00194] Avg episode reward: [(0, '22.354')] +[2024-09-01 15:59:30,136][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3723264. Throughput: 0: 223.9. Samples: 933176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 15:59:30,139][00194] Avg episode reward: [(0, '22.562')] +[2024-09-01 15:59:30,695][03034] Updated weights for policy 0, policy_version 910 (0.1564) +[2024-09-01 15:59:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3731456. Throughput: 0: 230.4. Samples: 934526. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:59:35,139][00194] Avg episode reward: [(0, '22.871')] +[2024-09-01 15:59:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3735552. Throughput: 0: 229.1. Samples: 935338. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:59:40,142][00194] Avg episode reward: [(0, '23.149')] +[2024-09-01 15:59:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3739648. Throughput: 0: 225.9. Samples: 936532. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:59:45,146][00194] Avg episode reward: [(0, '23.944')] +[2024-09-01 15:59:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3743744. Throughput: 0: 235.4. Samples: 937984. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:59:50,139][00194] Avg episode reward: [(0, '24.146')] +[2024-09-01 15:59:55,137][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3747840. Throughput: 0: 230.7. Samples: 938624. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 15:59:55,144][00194] Avg episode reward: [(0, '24.034')] +[2024-09-01 16:00:00,138][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3751936. Throughput: 0: 223.5. Samples: 939794. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:00,140][00194] Avg episode reward: [(0, '24.205')] +[2024-09-01 16:00:05,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3756032. Throughput: 0: 228.6. Samples: 941300. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:05,141][00194] Avg episode reward: [(0, '24.291')] +[2024-09-01 16:00:10,136][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3760128. Throughput: 0: 226.6. Samples: 941772. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:10,147][00194] Avg episode reward: [(0, '24.125')] +[2024-09-01 16:00:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3764224. Throughput: 0: 227.2. Samples: 943402. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:15,139][00194] Avg episode reward: [(0, '23.769')] +[2024-09-01 16:00:16,114][03034] Updated weights for policy 0, policy_version 920 (0.1540) +[2024-09-01 16:00:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3768320. Throughput: 0: 223.6. Samples: 944586. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:00:20,148][00194] Avg episode reward: [(0, '24.086')] +[2024-09-01 16:00:21,046][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000921_3772416.pth... +[2024-09-01 16:00:21,155][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000868_3555328.pth +[2024-09-01 16:00:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3776512. Throughput: 0: 221.5. Samples: 945304. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:25,139][00194] Avg episode reward: [(0, '23.475')] +[2024-09-01 16:00:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3780608. Throughput: 0: 225.3. Samples: 946672. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:00:30,141][00194] Avg episode reward: [(0, '24.147')] +[2024-09-01 16:00:35,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3784704. Throughput: 0: 218.6. Samples: 947822. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:00:35,144][00194] Avg episode reward: [(0, '24.289')] +[2024-09-01 16:00:40,141][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3788800. Throughput: 0: 219.0. Samples: 948482. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:00:40,151][00194] Avg episode reward: [(0, '24.021')] +[2024-09-01 16:00:45,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3792896. Throughput: 0: 230.3. Samples: 950156. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:00:45,150][00194] Avg episode reward: [(0, '23.460')] +[2024-09-01 16:00:50,137][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3796992. Throughput: 0: 231.0. Samples: 951696. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:00:50,144][00194] Avg episode reward: [(0, '23.930')] +[2024-09-01 16:00:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3801088. Throughput: 0: 227.1. Samples: 951992. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:00:55,138][00194] Avg episode reward: [(0, '24.220')] +[2024-09-01 16:01:00,136][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3805184. Throughput: 0: 226.5. Samples: 953594. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:01:00,148][00194] Avg episode reward: [(0, '23.937')] +[2024-09-01 16:01:00,770][03034] Updated weights for policy 0, policy_version 930 (0.0685) +[2024-09-01 16:01:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3813376. Throughput: 0: 230.3. Samples: 954950. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:05,143][00194] Avg episode reward: [(0, '24.109')] +[2024-09-01 16:01:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3813376. Throughput: 0: 231.3. Samples: 955712. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:10,143][00194] Avg episode reward: [(0, '24.109')] +[2024-09-01 16:01:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3821568. Throughput: 0: 227.4. Samples: 956906. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:01:15,138][00194] Avg episode reward: [(0, '23.607')] +[2024-09-01 16:01:20,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3825664. Throughput: 0: 234.9. Samples: 958392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:01:20,139][00194] Avg episode reward: [(0, '22.258')] +[2024-09-01 16:01:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3829760. Throughput: 0: 235.4. Samples: 959072. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:01:25,140][00194] Avg episode reward: [(0, '22.667')] +[2024-09-01 16:01:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3833856. Throughput: 0: 222.8. Samples: 960184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:30,145][00194] Avg episode reward: [(0, '23.086')] +[2024-09-01 16:01:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3837952. Throughput: 0: 223.7. Samples: 961762. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:35,140][00194] Avg episode reward: [(0, '23.359')] +[2024-09-01 16:01:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3842048. Throughput: 0: 233.4. Samples: 962496. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:40,146][00194] Avg episode reward: [(0, '22.653')] +[2024-09-01 16:01:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3846144. Throughput: 0: 230.8. Samples: 963980. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:45,139][00194] Avg episode reward: [(0, '22.871')] +[2024-09-01 16:01:45,943][03034] Updated weights for policy 0, policy_version 940 (0.0555) +[2024-09-01 16:01:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3850240. Throughput: 0: 227.9. Samples: 965206. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:01:50,146][00194] Avg episode reward: [(0, '22.790')] +[2024-09-01 16:01:55,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3858432. Throughput: 0: 228.5. Samples: 965996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:01:55,138][00194] Avg episode reward: [(0, '22.756')] +[2024-09-01 16:02:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3862528. Throughput: 0: 231.2. Samples: 967312. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:02:00,140][00194] Avg episode reward: [(0, '22.462')] +[2024-09-01 16:02:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3866624. Throughput: 0: 224.1. Samples: 968476. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:05,139][00194] Avg episode reward: [(0, '22.907')] +[2024-09-01 16:02:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3870720. Throughput: 0: 224.6. Samples: 969178. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:10,145][00194] Avg episode reward: [(0, '22.353')] +[2024-09-01 16:02:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3874816. Throughput: 0: 236.0. Samples: 970806. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:15,144][00194] Avg episode reward: [(0, '23.123')] +[2024-09-01 16:02:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3878912. Throughput: 0: 235.2. Samples: 972346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:20,139][00194] Avg episode reward: [(0, '22.516')] +[2024-09-01 16:02:21,425][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000948_3883008.pth... +[2024-09-01 16:02:21,533][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000894_3661824.pth +[2024-09-01 16:02:25,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3883008. Throughput: 0: 226.1. Samples: 972670. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:02:25,139][00194] Avg episode reward: [(0, '22.653')] +[2024-09-01 16:02:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3887104. Throughput: 0: 229.3. Samples: 974300. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:02:30,145][00194] Avg episode reward: [(0, '22.519')] +[2024-09-01 16:02:30,804][03034] Updated weights for policy 0, policy_version 950 (0.1483) +[2024-09-01 16:02:33,248][03021] Signal inference workers to stop experience collection... (950 times) +[2024-09-01 16:02:33,319][03034] InferenceWorker_p0-w0: stopping experience collection (950 times) +[2024-09-01 16:02:34,217][03021] Signal inference workers to resume experience collection... (950 times) +[2024-09-01 16:02:34,219][03034] InferenceWorker_p0-w0: resuming experience collection (950 times) +[2024-09-01 16:02:35,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3895296. Throughput: 0: 229.2. Samples: 975518. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:35,143][00194] Avg episode reward: [(0, '23.093')] +[2024-09-01 16:02:40,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3899392. Throughput: 0: 226.3. Samples: 976180. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:02:40,141][00194] Avg episode reward: [(0, '23.167')] +[2024-09-01 16:02:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3903488. Throughput: 0: 227.1. Samples: 977530. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:02:45,139][00194] Avg episode reward: [(0, '23.087')] +[2024-09-01 16:02:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3907584. Throughput: 0: 229.9. Samples: 978822. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:50,139][00194] Avg episode reward: [(0, '23.175')] +[2024-09-01 16:02:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3911680. Throughput: 0: 230.1. Samples: 979534. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:02:55,139][00194] Avg episode reward: [(0, '23.824')] +[2024-09-01 16:03:00,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3915776. Throughput: 0: 219.8. Samples: 980698. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:03:00,139][00194] Avg episode reward: [(0, '24.015')] +[2024-09-01 16:03:05,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3919872. Throughput: 0: 220.0. Samples: 982244. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:03:05,139][00194] Avg episode reward: [(0, '23.490')] +[2024-09-01 16:03:10,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3923968. Throughput: 0: 224.1. Samples: 982756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:10,149][00194] Avg episode reward: [(0, '23.447')] +[2024-09-01 16:03:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3928064. Throughput: 0: 225.7. Samples: 984458. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:15,140][00194] Avg episode reward: [(0, '23.740')] +[2024-09-01 16:03:16,295][03034] Updated weights for policy 0, policy_version 960 (0.2640) +[2024-09-01 16:03:20,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3932160. Throughput: 0: 222.2. Samples: 985518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:03:20,148][00194] Avg episode reward: [(0, '24.262')] +[2024-09-01 16:03:25,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3940352. Throughput: 0: 230.0. Samples: 986528. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:25,147][00194] Avg episode reward: [(0, '24.329')] +[2024-09-01 16:03:30,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3944448. Throughput: 0: 226.9. Samples: 987742. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:30,143][00194] Avg episode reward: [(0, '24.238')] +[2024-09-01 16:03:35,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3948544. Throughput: 0: 225.1. Samples: 988952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:35,143][00194] Avg episode reward: [(0, '24.320')] +[2024-09-01 16:03:40,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3952640. Throughput: 0: 226.5. Samples: 989726. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:40,138][00194] Avg episode reward: [(0, '24.216')] +[2024-09-01 16:03:45,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 3956736. Throughput: 0: 235.0. Samples: 991274. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:03:45,150][00194] Avg episode reward: [(0, '25.185')] +[2024-09-01 16:03:50,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3960832. Throughput: 0: 234.0. Samples: 992774. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:03:50,140][00194] Avg episode reward: [(0, '25.249')] +[2024-09-01 16:03:51,110][03021] Saving new best policy, reward=25.185! +[2024-09-01 16:03:55,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3964928. Throughput: 0: 230.9. Samples: 993148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:03:55,142][00194] Avg episode reward: [(0, '25.319')] +[2024-09-01 16:03:56,231][03021] Saving new best policy, reward=25.249! +[2024-09-01 16:03:59,995][03021] Saving new best policy, reward=25.319! +[2024-09-01 16:04:00,007][03034] Updated weights for policy 0, policy_version 970 (0.1020) +[2024-09-01 16:04:00,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3973120. Throughput: 0: 229.5. Samples: 994784. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:04:00,138][00194] Avg episode reward: [(0, '25.156')] +[2024-09-01 16:04:05,136][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3977216. Throughput: 0: 230.3. Samples: 995880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:04:05,146][00194] Avg episode reward: [(0, '25.008')] +[2024-09-01 16:04:10,139][00194] Fps is (10 sec: 818.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3981312. Throughput: 0: 224.2. Samples: 996620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:10,147][00194] Avg episode reward: [(0, '25.910')] +[2024-09-01 16:04:14,339][03021] Saving new best policy, reward=25.910! +[2024-09-01 16:04:15,136][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3985408. Throughput: 0: 224.9. Samples: 997864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:15,146][00194] Avg episode reward: [(0, '25.117')] +[2024-09-01 16:04:18,206][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000974_3989504.pth... +[2024-09-01 16:04:18,320][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000921_3772416.pth +[2024-09-01 16:04:20,136][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 3989504. Throughput: 0: 235.2. Samples: 999536. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:20,143][00194] Avg episode reward: [(0, '24.987')] +[2024-09-01 16:04:25,139][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 3993600. Throughput: 0: 233.1. Samples: 1000218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:25,149][00194] Avg episode reward: [(0, '25.077')] +[2024-09-01 16:04:30,136][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 3997696. Throughput: 0: 224.0. Samples: 1001352. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:30,141][00194] Avg episode reward: [(0, '24.695')] +[2024-09-01 16:04:35,136][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4001792. Throughput: 0: 226.2. Samples: 1002954. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:04:35,139][00194] Avg episode reward: [(0, '24.647')] +[2024-09-01 16:04:36,205][03021] Stopping Batcher_0... +[2024-09-01 16:04:36,207][03021] Loop batcher_evt_loop terminating... +[2024-09-01 16:04:36,207][00194] Component Batcher_0 stopped! +[2024-09-01 16:04:36,419][03034] Weights refcount: 2 0 +[2024-09-01 16:04:36,423][00194] Component InferenceWorker_p0-w0 stopped! +[2024-09-01 16:04:36,429][03034] Stopping InferenceWorker_p0-w0... +[2024-09-01 16:04:36,430][03034] Loop inference_proc0-0_evt_loop terminating... +[2024-09-01 16:04:36,780][00194] Component RolloutWorker_w2 stopped! +[2024-09-01 16:04:36,788][03037] Stopping RolloutWorker_w2... +[2024-09-01 16:04:36,814][03037] Loop rollout_proc2_evt_loop terminating... +[2024-09-01 16:04:36,840][00194] Component RolloutWorker_w1 stopped! +[2024-09-01 16:04:36,857][00194] Component RolloutWorker_w4 stopped! +[2024-09-01 16:04:36,865][00194] Component RolloutWorker_w3 stopped! +[2024-09-01 16:04:36,841][03036] Stopping RolloutWorker_w1... +[2024-09-01 16:04:36,887][03036] Loop rollout_proc1_evt_loop terminating... +[2024-09-01 16:04:36,890][00194] Component RolloutWorker_w6 stopped! +[2024-09-01 16:04:36,911][03040] Stopping RolloutWorker_w5... +[2024-09-01 16:04:36,911][00194] Component RolloutWorker_w5 stopped! +[2024-09-01 16:04:36,926][00194] Component RolloutWorker_w7 stopped! +[2024-09-01 16:04:36,871][03039] Stopping RolloutWorker_w4... +[2024-09-01 16:04:36,888][03038] Stopping RolloutWorker_w3... +[2024-09-01 16:04:36,946][00194] Component RolloutWorker_w0 stopped! +[2024-09-01 16:04:36,898][03041] Stopping RolloutWorker_w6... +[2024-09-01 16:04:36,954][03040] Loop rollout_proc5_evt_loop terminating... +[2024-09-01 16:04:36,962][03038] Loop rollout_proc3_evt_loop terminating... +[2024-09-01 16:04:36,952][03035] Stopping RolloutWorker_w0... +[2024-09-01 16:04:36,954][03039] Loop rollout_proc4_evt_loop terminating... +[2024-09-01 16:04:36,964][03041] Loop rollout_proc6_evt_loop terminating... +[2024-09-01 16:04:36,945][03042] Stopping RolloutWorker_w7... +[2024-09-01 16:04:36,992][03042] Loop rollout_proc7_evt_loop terminating... +[2024-09-01 16:04:36,994][03035] Loop rollout_proc0_evt_loop terminating... +[2024-09-01 16:04:41,411][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000979_4009984.pth... +[2024-09-01 16:04:41,524][03021] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000948_3883008.pth +[2024-09-01 16:04:41,548][03021] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000979_4009984.pth... +[2024-09-01 16:04:41,741][03021] Stopping LearnerWorker_p0... +[2024-09-01 16:04:41,741][03021] Loop learner_proc0_evt_loop terminating... +[2024-09-01 16:04:41,741][00194] Component LearnerWorker_p0 stopped! +[2024-09-01 16:04:41,745][00194] Waiting for process learner_proc0 to stop... +[2024-09-01 16:04:43,065][00194] Waiting for process inference_proc0-0 to join... +[2024-09-01 16:04:43,073][00194] Waiting for process rollout_proc0 to join... +[2024-09-01 16:04:44,240][00194] Waiting for process rollout_proc1 to join... +[2024-09-01 16:04:44,250][00194] Waiting for process rollout_proc2 to join... +[2024-09-01 16:04:44,278][00194] Waiting for process rollout_proc3 to join... +[2024-09-01 16:04:44,286][00194] Waiting for process rollout_proc4 to join... +[2024-09-01 16:04:44,297][00194] Waiting for process rollout_proc5 to join... +[2024-09-01 16:04:44,301][00194] Waiting for process rollout_proc6 to join... +[2024-09-01 16:04:44,309][00194] Waiting for process rollout_proc7 to join... +[2024-09-01 16:04:44,314][00194] Batcher 0 profile tree view: +batching: 20.5903, releasing_batches: 0.2968 +[2024-09-01 16:04:44,318][00194] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0001 - wait_policy_total: 68.0720 -update_model: 45.9865 - weight_update: 0.1255 -one_step: 0.0551 - handle_policy_step: 1285.7323 - deserialize: 18.0612, stack: 3.3590, obs_to_device_normalize: 133.9782, forward: 1044.8374, send_messages: 26.8132 - prepare_outputs: 30.6331 - to_cpu: 3.5112 -[2024-09-01 07:02:15,609][00307] Learner 0 profile tree view: -misc: 0.0023, prepare_batch: 444.6382 -train: 1309.7648 - epoch_init: 0.0050, minibatch_init: 0.0040, losses_postprocess: 0.0519, kl_divergence: 0.2532, after_optimizer: 0.9695 - calculate_losses: 543.4052 - losses_init: 0.0017, forward_head: 475.6739, bptt_initial: 1.9416, tail: 1.2634, advantages_returns: 0.1093, losses: 0.6511 - bptt: 63.5370 - bptt_forward_core: 63.1447 - update: 764.8126 - clip: 1.5666 -[2024-09-01 07:02:15,612][00307] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.3756, enqueue_policy_requests: 43.7998, env_step: 958.1747, overhead: 21.8893, complete_rollouts: 9.7052 -save_policy_outputs: 37.9437 - split_output_tensors: 12.8258 -[2024-09-01 07:02:15,614][00307] RolloutWorker_w7 profile tree view: -wait_for_trajectories: 0.4284, enqueue_policy_requests: 54.3901, env_step: 923.3749, overhead: 19.9329, complete_rollouts: 7.7933 -save_policy_outputs: 34.3538 - split_output_tensors: 11.3254 -[2024-09-01 07:02:15,617][00307] Loop Runner_EvtLoop terminating... -[2024-09-01 07:02:15,620][00307] Runner profile tree view: -main_loop: 1800.7612 -[2024-09-01 07:02:15,623][00307] Collected {0: 1540096}, FPS: 855.2 -[2024-09-01 07:02:15,692][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 07:02:15,695][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 07:02:15,697][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 07:02:15,700][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 07:02:15,702][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 07:02:15,704][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 07:02:15,706][00307] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 07:02:15,707][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 07:02:15,708][00307] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-09-01 07:02:15,709][00307] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-09-01 07:02:15,711][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 07:02:15,712][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 07:02:15,713][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 07:02:15,715][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 07:02:15,716][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 07:02:15,754][00307] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:02:15,757][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 07:02:15,762][00307] RunningMeanStd input shape: (1,) -[2024-09-01 07:02:15,796][00307] ConvEncoder: input_channels=3 -[2024-09-01 07:02:15,986][00307] Conv encoder output size: 512 -[2024-09-01 07:02:15,988][00307] Policy head output size: 512 -[2024-09-01 07:02:16,014][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:02:16,694][00307] Num frames 100... -[2024-09-01 07:02:16,886][00307] Num frames 200... -[2024-09-01 07:02:17,098][00307] Num frames 300... -[2024-09-01 07:02:17,325][00307] Num frames 400... -[2024-09-01 07:02:17,482][00307] Avg episode rewards: #0: 7.480, true rewards: #0: 4.480 -[2024-09-01 07:02:17,487][00307] Avg episode reward: 7.480, avg true_objective: 4.480 -[2024-09-01 07:02:17,596][00307] Num frames 500... -[2024-09-01 07:02:17,790][00307] Num frames 600... -[2024-09-01 07:02:17,979][00307] Num frames 700... -[2024-09-01 07:02:18,194][00307] Num frames 800... -[2024-09-01 07:02:18,414][00307] Num frames 900... -[2024-09-01 07:02:18,610][00307] Num frames 1000... -[2024-09-01 07:02:18,809][00307] Num frames 1100... -[2024-09-01 07:02:18,998][00307] Num frames 1200... -[2024-09-01 07:02:19,090][00307] Avg episode rewards: #0: 10.580, true rewards: #0: 6.080 -[2024-09-01 07:02:19,092][00307] Avg episode reward: 10.580, avg true_objective: 6.080 -[2024-09-01 07:02:19,251][00307] Num frames 1300... -[2024-09-01 07:02:19,453][00307] Num frames 1400... -[2024-09-01 07:02:19,646][00307] Num frames 1500... -[2024-09-01 07:02:19,843][00307] Num frames 1600... -[2024-09-01 07:02:20,039][00307] Num frames 1700... -[2024-09-01 07:02:20,238][00307] Num frames 1800... -[2024-09-01 07:02:20,434][00307] Num frames 1900... -[2024-09-01 07:02:20,627][00307] Num frames 2000... -[2024-09-01 07:02:20,829][00307] Num frames 2100... -[2024-09-01 07:02:21,030][00307] Num frames 2200... -[2024-09-01 07:02:21,239][00307] Num frames 2300... -[2024-09-01 07:02:21,430][00307] Avg episode rewards: #0: 14.227, true rewards: #0: 7.893 -[2024-09-01 07:02:21,432][00307] Avg episode reward: 14.227, avg true_objective: 7.893 -[2024-09-01 07:02:21,502][00307] Num frames 2400... -[2024-09-01 07:02:21,693][00307] Num frames 2500... -[2024-09-01 07:02:21,892][00307] Num frames 2600... -[2024-09-01 07:02:22,088][00307] Num frames 2700... -[2024-09-01 07:02:22,180][00307] Avg episode rewards: #0: 11.788, true rewards: #0: 6.787 -[2024-09-01 07:02:22,182][00307] Avg episode reward: 11.788, avg true_objective: 6.787 -[2024-09-01 07:02:22,350][00307] Num frames 2800... -[2024-09-01 07:02:22,569][00307] Num frames 2900... -[2024-09-01 07:02:22,845][00307] Num frames 3000... -[2024-09-01 07:02:22,919][00307] Avg episode rewards: #0: 10.406, true rewards: #0: 6.006 -[2024-09-01 07:02:22,922][00307] Avg episode reward: 10.406, avg true_objective: 6.006 -[2024-09-01 07:02:23,197][00307] Num frames 3100... -[2024-09-01 07:02:23,478][00307] Num frames 3200... -[2024-09-01 07:02:23,744][00307] Num frames 3300... -[2024-09-01 07:02:24,006][00307] Num frames 3400... -[2024-09-01 07:02:24,298][00307] Num frames 3500... -[2024-09-01 07:02:24,567][00307] Num frames 3600... -[2024-09-01 07:02:24,838][00307] Num frames 3700... -[2024-09-01 07:02:25,114][00307] Num frames 3800... -[2024-09-01 07:02:25,403][00307] Num frames 3900... -[2024-09-01 07:02:25,696][00307] Num frames 4000... -[2024-09-01 07:02:25,904][00307] Num frames 4100... -[2024-09-01 07:02:26,107][00307] Num frames 4200... -[2024-09-01 07:02:26,304][00307] Num frames 4300... -[2024-09-01 07:02:26,391][00307] Avg episode rewards: #0: 13.025, true rewards: #0: 7.192 -[2024-09-01 07:02:26,393][00307] Avg episode reward: 13.025, avg true_objective: 7.192 -[2024-09-01 07:02:26,557][00307] Num frames 4400... -[2024-09-01 07:02:26,751][00307] Num frames 4500... -[2024-09-01 07:02:26,939][00307] Num frames 4600... -[2024-09-01 07:02:27,144][00307] Num frames 4700... -[2024-09-01 07:02:27,346][00307] Num frames 4800... -[2024-09-01 07:02:27,599][00307] Avg episode rewards: #0: 12.559, true rewards: #0: 6.987 -[2024-09-01 07:02:27,602][00307] Avg episode reward: 12.559, avg true_objective: 6.987 -[2024-09-01 07:02:27,624][00307] Num frames 4900... -[2024-09-01 07:02:27,810][00307] Num frames 5000... -[2024-09-01 07:02:28,004][00307] Num frames 5100... -[2024-09-01 07:02:28,197][00307] Num frames 5200... -[2024-09-01 07:02:28,405][00307] Num frames 5300... -[2024-09-01 07:02:28,607][00307] Num frames 5400... -[2024-09-01 07:02:28,670][00307] Avg episode rewards: #0: 12.001, true rewards: #0: 6.751 -[2024-09-01 07:02:28,673][00307] Avg episode reward: 12.001, avg true_objective: 6.751 -[2024-09-01 07:02:28,894][00307] Num frames 5500... -[2024-09-01 07:02:29,085][00307] Num frames 5600... -[2024-09-01 07:02:29,277][00307] Num frames 5700... -[2024-09-01 07:02:29,481][00307] Num frames 5800... -[2024-09-01 07:02:29,674][00307] Num frames 5900... -[2024-09-01 07:02:29,878][00307] Avg episode rewards: #0: 11.755, true rewards: #0: 6.643 -[2024-09-01 07:02:29,881][00307] Avg episode reward: 11.755, avg true_objective: 6.643 -[2024-09-01 07:02:29,924][00307] Num frames 6000... -[2024-09-01 07:02:30,122][00307] Num frames 6100... -[2024-09-01 07:02:30,319][00307] Num frames 6200... -[2024-09-01 07:02:30,535][00307] Num frames 6300... -[2024-09-01 07:02:30,713][00307] Avg episode rewards: #0: 10.963, true rewards: #0: 6.363 -[2024-09-01 07:02:30,715][00307] Avg episode reward: 10.963, avg true_objective: 6.363 -[2024-09-01 07:03:14,376][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-01 07:03:14,432][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 07:03:14,434][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 07:03:14,437][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 07:03:14,440][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 07:03:14,444][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 07:03:14,447][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 07:03:14,449][00307] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-09-01 07:03:14,453][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 07:03:14,454][00307] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-09-01 07:03:14,456][00307] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-09-01 07:03:14,457][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 07:03:14,462][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 07:03:14,464][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 07:03:14,466][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 07:03:14,470][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 07:03:14,484][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 07:03:14,487][00307] RunningMeanStd input shape: (1,) -[2024-09-01 07:03:14,505][00307] ConvEncoder: input_channels=3 -[2024-09-01 07:03:14,566][00307] Conv encoder output size: 512 -[2024-09-01 07:03:14,568][00307] Policy head output size: 512 -[2024-09-01 07:03:14,596][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:03:15,146][00307] Num frames 100... -[2024-09-01 07:03:15,357][00307] Num frames 200... -[2024-09-01 07:03:15,546][00307] Num frames 300... -[2024-09-01 07:03:15,759][00307] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 -[2024-09-01 07:03:15,761][00307] Avg episode reward: 3.840, avg true_objective: 3.840 -[2024-09-01 07:03:15,797][00307] Num frames 400... -[2024-09-01 07:03:15,991][00307] Num frames 500... -[2024-09-01 07:03:16,182][00307] Num frames 600... -[2024-09-01 07:03:16,333][00307] Avg episode rewards: #0: 3.200, true rewards: #0: 3.200 -[2024-09-01 07:03:16,336][00307] Avg episode reward: 3.200, avg true_objective: 3.200 -[2024-09-01 07:03:16,466][00307] Num frames 700... -[2024-09-01 07:03:16,732][00307] Num frames 800... -[2024-09-01 07:03:16,994][00307] Num frames 900... -[2024-09-01 07:03:17,242][00307] Num frames 1000... -[2024-09-01 07:03:17,510][00307] Num frames 1100... -[2024-09-01 07:03:17,777][00307] Avg episode rewards: #0: 4.613, true rewards: #0: 3.947 -[2024-09-01 07:03:17,782][00307] Avg episode reward: 4.613, avg true_objective: 3.947 -[2024-09-01 07:03:17,830][00307] Num frames 1200... -[2024-09-01 07:03:18,086][00307] Num frames 1300... -[2024-09-01 07:03:18,360][00307] Num frames 1400... -[2024-09-01 07:03:18,624][00307] Num frames 1500... -[2024-09-01 07:03:18,890][00307] Num frames 1600... -[2024-09-01 07:03:19,161][00307] Num frames 1700... -[2024-09-01 07:03:19,457][00307] Num frames 1800... -[2024-09-01 07:03:19,703][00307] Num frames 1900... -[2024-09-01 07:03:19,800][00307] Avg episode rewards: #0: 7.050, true rewards: #0: 4.800 -[2024-09-01 07:03:19,802][00307] Avg episode reward: 7.050, avg true_objective: 4.800 -[2024-09-01 07:03:19,954][00307] Num frames 2000... -[2024-09-01 07:03:20,140][00307] Num frames 2100... -[2024-09-01 07:03:20,322][00307] Num frames 2200... -[2024-09-01 07:03:20,525][00307] Num frames 2300... -[2024-09-01 07:03:20,716][00307] Num frames 2400... -[2024-09-01 07:03:20,904][00307] Num frames 2500... -[2024-09-01 07:03:21,097][00307] Num frames 2600... -[2024-09-01 07:03:21,287][00307] Num frames 2700... -[2024-09-01 07:03:21,507][00307] Avg episode rewards: #0: 9.168, true rewards: #0: 5.568 -[2024-09-01 07:03:21,510][00307] Avg episode reward: 9.168, avg true_objective: 5.568 -[2024-09-01 07:03:21,544][00307] Num frames 2800... -[2024-09-01 07:03:21,729][00307] Num frames 2900... -[2024-09-01 07:03:21,915][00307] Num frames 3000... -[2024-09-01 07:03:22,098][00307] Num frames 3100... -[2024-09-01 07:03:22,286][00307] Num frames 3200... -[2024-09-01 07:03:22,479][00307] Num frames 3300... -[2024-09-01 07:03:22,596][00307] Avg episode rewards: #0: 8.880, true rewards: #0: 5.547 -[2024-09-01 07:03:22,599][00307] Avg episode reward: 8.880, avg true_objective: 5.547 -[2024-09-01 07:03:22,735][00307] Num frames 3400... -[2024-09-01 07:03:22,925][00307] Num frames 3500... -[2024-09-01 07:03:23,109][00307] Num frames 3600... -[2024-09-01 07:03:23,300][00307] Num frames 3700... -[2024-09-01 07:03:23,464][00307] Avg episode rewards: #0: 8.366, true rewards: #0: 5.366 -[2024-09-01 07:03:23,466][00307] Avg episode reward: 8.366, avg true_objective: 5.366 -[2024-09-01 07:03:23,561][00307] Num frames 3800... -[2024-09-01 07:03:23,741][00307] Num frames 3900... -[2024-09-01 07:03:23,926][00307] Num frames 4000... -[2024-09-01 07:03:24,118][00307] Num frames 4100... -[2024-09-01 07:03:24,308][00307] Num frames 4200... -[2024-09-01 07:03:24,494][00307] Num frames 4300... -[2024-09-01 07:03:24,732][00307] Avg episode rewards: #0: 8.620, true rewards: #0: 5.495 -[2024-09-01 07:03:24,735][00307] Avg episode reward: 8.620, avg true_objective: 5.495 -[2024-09-01 07:03:24,745][00307] Num frames 4400... -[2024-09-01 07:03:24,932][00307] Num frames 4500... -[2024-09-01 07:03:25,112][00307] Num frames 4600... -[2024-09-01 07:03:25,300][00307] Num frames 4700... -[2024-09-01 07:03:25,486][00307] Num frames 4800... -[2024-09-01 07:03:25,620][00307] Avg episode rewards: #0: 8.271, true rewards: #0: 5.382 -[2024-09-01 07:03:25,623][00307] Avg episode reward: 8.271, avg true_objective: 5.382 -[2024-09-01 07:03:25,726][00307] Num frames 4900... -[2024-09-01 07:03:25,906][00307] Num frames 5000... -[2024-09-01 07:03:26,087][00307] Num frames 5100... -[2024-09-01 07:03:26,268][00307] Num frames 5200... -[2024-09-01 07:03:26,463][00307] Num frames 5300... -[2024-09-01 07:03:26,662][00307] Num frames 5400... -[2024-09-01 07:03:26,847][00307] Num frames 5500... -[2024-09-01 07:03:27,038][00307] Num frames 5600... -[2024-09-01 07:03:27,214][00307] Num frames 5700... -[2024-09-01 07:03:27,285][00307] Avg episode rewards: #0: 8.908, true rewards: #0: 5.708 -[2024-09-01 07:03:27,288][00307] Avg episode reward: 8.908, avg true_objective: 5.708 -[2024-09-01 07:04:05,352][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-01 07:04:20,071][00307] The model has been pushed to https://huggingface.co/jarski/rl_course_vizdoom_health_gathering_supreme -[2024-09-01 07:06:22,936][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 07:06:22,939][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 07:06:22,942][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 07:06:22,943][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 07:06:22,947][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 07:06:22,949][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 07:06:22,952][00307] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-09-01 07:06:22,953][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 07:06:22,956][00307] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-09-01 07:06:22,957][00307] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-09-01 07:06:22,960][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 07:06:22,961][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 07:06:22,962][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 07:06:22,964][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 07:06:22,965][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 07:06:22,984][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 07:06:22,986][00307] RunningMeanStd input shape: (1,) -[2024-09-01 07:06:23,002][00307] ConvEncoder: input_channels=3 -[2024-09-01 07:06:23,049][00307] Conv encoder output size: 512 -[2024-09-01 07:06:23,051][00307] Policy head output size: 512 -[2024-09-01 07:06:23,070][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:06:23,622][00307] Num frames 100... -[2024-09-01 07:06:23,804][00307] Num frames 200... -[2024-09-01 07:06:24,003][00307] Num frames 300... -[2024-09-01 07:06:24,197][00307] Num frames 400... -[2024-09-01 07:06:24,396][00307] Num frames 500... -[2024-09-01 07:06:24,594][00307] Num frames 600... -[2024-09-01 07:06:24,781][00307] Num frames 700... -[2024-09-01 07:06:24,962][00307] Num frames 800... -[2024-09-01 07:06:25,149][00307] Num frames 900... -[2024-09-01 07:06:25,257][00307] Avg episode rewards: #0: 16.280, true rewards: #0: 9.280 -[2024-09-01 07:06:25,261][00307] Avg episode reward: 16.280, avg true_objective: 9.280 -[2024-09-01 07:06:25,420][00307] Num frames 1000... -[2024-09-01 07:06:25,606][00307] Num frames 1100... -[2024-09-01 07:06:25,800][00307] Num frames 1200... -[2024-09-01 07:06:25,989][00307] Num frames 1300... -[2024-09-01 07:06:26,183][00307] Num frames 1400... -[2024-09-01 07:06:26,383][00307] Num frames 1500... -[2024-09-01 07:06:26,576][00307] Num frames 1600... -[2024-09-01 07:06:26,692][00307] Avg episode rewards: #0: 14.660, true rewards: #0: 8.160 -[2024-09-01 07:06:26,694][00307] Avg episode reward: 14.660, avg true_objective: 8.160 -[2024-09-01 07:06:26,828][00307] Num frames 1700... -[2024-09-01 07:06:27,016][00307] Num frames 1800... -[2024-09-01 07:06:27,202][00307] Num frames 1900... -[2024-09-01 07:06:27,429][00307] Avg episode rewards: #0: 11.280, true rewards: #0: 6.613 -[2024-09-01 07:06:27,433][00307] Avg episode reward: 11.280, avg true_objective: 6.613 -[2024-09-01 07:06:27,468][00307] Num frames 2000... -[2024-09-01 07:06:27,652][00307] Num frames 2100... -[2024-09-01 07:06:27,843][00307] Num frames 2200... -[2024-09-01 07:06:28,034][00307] Num frames 2300... -[2024-09-01 07:06:28,226][00307] Num frames 2400... -[2024-09-01 07:06:28,439][00307] Num frames 2500... -[2024-09-01 07:06:28,610][00307] Avg episode rewards: #0: 10.900, true rewards: #0: 6.400 -[2024-09-01 07:06:28,612][00307] Avg episode reward: 10.900, avg true_objective: 6.400 -[2024-09-01 07:06:28,689][00307] Num frames 2600... -[2024-09-01 07:06:28,877][00307] Num frames 2700... -[2024-09-01 07:06:29,064][00307] Num frames 2800... -[2024-09-01 07:06:29,258][00307] Num frames 2900... -[2024-09-01 07:06:29,462][00307] Num frames 3000... -[2024-09-01 07:06:29,534][00307] Avg episode rewards: #0: 9.816, true rewards: #0: 6.016 -[2024-09-01 07:06:29,536][00307] Avg episode reward: 9.816, avg true_objective: 6.016 -[2024-09-01 07:06:29,711][00307] Num frames 3100... -[2024-09-01 07:06:29,941][00307] Num frames 3200... -[2024-09-01 07:06:30,126][00307] Num frames 3300... -[2024-09-01 07:06:30,319][00307] Num frames 3400... -[2024-09-01 07:06:30,528][00307] Num frames 3500... -[2024-09-01 07:06:30,682][00307] Avg episode rewards: #0: 9.420, true rewards: #0: 5.920 -[2024-09-01 07:06:30,685][00307] Avg episode reward: 9.420, avg true_objective: 5.920 -[2024-09-01 07:06:30,776][00307] Num frames 3600... -[2024-09-01 07:06:30,970][00307] Num frames 3700... -[2024-09-01 07:06:31,167][00307] Num frames 3800... -[2024-09-01 07:06:31,246][00307] Avg episode rewards: #0: 8.440, true rewards: #0: 5.440 -[2024-09-01 07:06:31,249][00307] Avg episode reward: 8.440, avg true_objective: 5.440 -[2024-09-01 07:06:31,433][00307] Num frames 3900... -[2024-09-01 07:06:31,639][00307] Num frames 4000... -[2024-09-01 07:06:31,828][00307] Num frames 4100... -[2024-09-01 07:06:32,021][00307] Num frames 4200... -[2024-09-01 07:06:32,177][00307] Avg episode rewards: #0: 8.070, true rewards: #0: 5.320 -[2024-09-01 07:06:32,179][00307] Avg episode reward: 8.070, avg true_objective: 5.320 -[2024-09-01 07:06:32,266][00307] Num frames 4300... -[2024-09-01 07:06:32,461][00307] Num frames 4400... -[2024-09-01 07:06:32,655][00307] Num frames 4500... -[2024-09-01 07:06:32,878][00307] Num frames 4600... -[2024-09-01 07:06:33,134][00307] Avg episode rewards: #0: 7.858, true rewards: #0: 5.191 -[2024-09-01 07:06:33,137][00307] Avg episode reward: 7.858, avg true_objective: 5.191 -[2024-09-01 07:06:33,212][00307] Num frames 4700... -[2024-09-01 07:06:33,484][00307] Num frames 4800... -[2024-09-01 07:06:33,752][00307] Num frames 4900... -[2024-09-01 07:06:34,020][00307] Num frames 5000... -[2024-09-01 07:06:34,281][00307] Num frames 5100... -[2024-09-01 07:06:34,394][00307] Avg episode rewards: #0: 7.620, true rewards: #0: 5.120 -[2024-09-01 07:06:34,398][00307] Avg episode reward: 7.620, avg true_objective: 5.120 -[2024-09-01 07:07:08,879][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-01 07:07:12,309][00307] The model has been pushed to https://huggingface.co/jarski/rl_course_vizdoom_health_gathering_supreme -[2024-09-01 07:11:57,046][00307] Environment doom_basic already registered, overwriting... -[2024-09-01 07:11:57,051][00307] Environment doom_two_colors_easy already registered, overwriting... -[2024-09-01 07:11:57,055][00307] Environment doom_two_colors_hard already registered, overwriting... -[2024-09-01 07:11:57,059][00307] Environment doom_dm already registered, overwriting... -[2024-09-01 07:11:57,061][00307] Environment doom_dwango5 already registered, overwriting... -[2024-09-01 07:11:57,065][00307] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-09-01 07:11:57,067][00307] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-09-01 07:11:57,069][00307] Environment doom_my_way_home already registered, overwriting... -[2024-09-01 07:11:57,070][00307] Environment doom_deadly_corridor already registered, overwriting... -[2024-09-01 07:11:57,073][00307] Environment doom_defend_the_center already registered, overwriting... -[2024-09-01 07:11:57,074][00307] Environment doom_defend_the_line already registered, overwriting... -[2024-09-01 07:11:57,076][00307] Environment doom_health_gathering already registered, overwriting... -[2024-09-01 07:11:57,078][00307] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-09-01 07:11:57,079][00307] Environment doom_battle already registered, overwriting... -[2024-09-01 07:11:57,080][00307] Environment doom_battle2 already registered, overwriting... -[2024-09-01 07:11:57,082][00307] Environment doom_duel_bots already registered, overwriting... -[2024-09-01 07:11:57,083][00307] Environment doom_deathmatch_bots already registered, overwriting... -[2024-09-01 07:11:57,084][00307] Environment doom_duel already registered, overwriting... -[2024-09-01 07:11:57,087][00307] Environment doom_deathmatch_full already registered, overwriting... -[2024-09-01 07:11:57,088][00307] Environment doom_benchmark already registered, overwriting... -[2024-09-01 07:11:57,090][00307] register_encoder_factory: -[2024-09-01 07:11:57,124][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 07:11:57,127][00307] Overriding arg 'train_for_env_steps' with value 12000000 passed from command line -[2024-09-01 07:11:57,139][00307] Experiment dir /content/train_dir/default_experiment already exists! -[2024-09-01 07:11:57,143][00307] Resuming existing experiment from /content/train_dir/default_experiment... -[2024-09-01 07:11:57,145][00307] Weights and Biases integration disabled -[2024-09-01 07:11:57,154][00307] Environment var CUDA_VISIBLE_DEVICES is - -[2024-09-01 07:11:59,806][00307] Starting experiment with the following configuration: + wait_policy_total: 44.9605 +update_model: 132.0643 + weight_update: 0.0560 +one_step: 0.0290 + handle_policy_step: 2882.0811 + deserialize: 91.9792, stack: 14.2129, obs_to_device_normalize: 498.6304, forward: 2093.8209, send_messages: 67.3715 + prepare_outputs: 34.0184 + to_cpu: 3.4641 +[2024-09-01 16:04:44,320][00194] Learner 0 profile tree view: +misc: 0.0066, prepare_batch: 1289.7853 +train: 3103.9562 + epoch_init: 0.0091, minibatch_init: 0.0245, losses_postprocess: 0.1424, kl_divergence: 0.4661, after_optimizer: 2.6855 + calculate_losses: 1524.2036 + losses_init: 0.0044, forward_head: 1364.9844, bptt_initial: 4.4690, tail: 3.4347, advantages_returns: 0.2297, losses: 1.4560 + bptt: 149.0615 + bptt_forward_core: 148.1692 + update: 1575.6538 + clip: 3.8090 +[2024-09-01 16:04:44,323][00194] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.6465, enqueue_policy_requests: 55.2539, env_step: 1623.3020, overhead: 40.2221, complete_rollouts: 17.8695 +save_policy_outputs: 40.9623 + split_output_tensors: 12.9830 +[2024-09-01 16:04:44,325][00194] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.7192, enqueue_policy_requests: 54.9433, env_step: 1625.3504, overhead: 40.9641, complete_rollouts: 15.5030 +save_policy_outputs: 41.8531 + split_output_tensors: 13.9784 +[2024-09-01 16:04:44,327][00194] Loop Runner_EvtLoop terminating... +[2024-09-01 16:04:44,329][00194] Runner profile tree view: +main_loop: 4465.4549 +[2024-09-01 16:04:44,331][00194] Collected {0: 4009984}, FPS: 898.0 +[2024-09-01 16:05:41,893][00194] Environment doom_basic already registered, overwriting... +[2024-09-01 16:05:41,897][00194] Environment doom_two_colors_easy already registered, overwriting... +[2024-09-01 16:05:41,898][00194] Environment doom_two_colors_hard already registered, overwriting... +[2024-09-01 16:05:41,901][00194] Environment doom_dm already registered, overwriting... +[2024-09-01 16:05:41,903][00194] Environment doom_dwango5 already registered, overwriting... +[2024-09-01 16:05:41,905][00194] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-09-01 16:05:41,907][00194] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-09-01 16:05:41,908][00194] Environment doom_my_way_home already registered, overwriting... +[2024-09-01 16:05:41,911][00194] Environment doom_deadly_corridor already registered, overwriting... +[2024-09-01 16:05:41,912][00194] Environment doom_defend_the_center already registered, overwriting... +[2024-09-01 16:05:41,914][00194] Environment doom_defend_the_line already registered, overwriting... +[2024-09-01 16:05:41,915][00194] Environment doom_health_gathering already registered, overwriting... +[2024-09-01 16:05:41,917][00194] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-09-01 16:05:41,920][00194] Environment doom_battle already registered, overwriting... +[2024-09-01 16:05:41,922][00194] Environment doom_battle2 already registered, overwriting... +[2024-09-01 16:05:41,924][00194] Environment doom_duel_bots already registered, overwriting... +[2024-09-01 16:05:41,926][00194] Environment doom_deathmatch_bots already registered, overwriting... +[2024-09-01 16:05:41,927][00194] Environment doom_duel already registered, overwriting... +[2024-09-01 16:05:41,928][00194] Environment doom_deathmatch_full already registered, overwriting... +[2024-09-01 16:05:41,930][00194] Environment doom_benchmark already registered, overwriting... +[2024-09-01 16:05:41,931][00194] register_encoder_factory: +[2024-09-01 16:05:41,965][00194] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-01 16:05:41,975][00194] Experiment dir /content/train_dir/default_experiment already exists! +[2024-09-01 16:05:41,976][00194] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-09-01 16:05:41,980][00194] Weights and Biases integration disabled +[2024-09-01 16:05:41,986][00194] Environment var CUDA_VISIBLE_DEVICES is +[2024-09-01 16:05:45,681][00194] Starting experiment with the following configuration: help=False algo=APPO env=doom_health_gathering_supreme @@ -1638,7 +2471,7 @@ stats_avg=100 summaries_use_frameskip=True heartbeat_interval=20 heartbeat_reporting_interval=600 -train_for_env_steps=12000000 +train_for_env_steps=4000000 train_for_seconds=10000000000 save_every_sec=120 keep_checkpoints=2 @@ -1698,49 +2531,59 @@ res_h=72 wide_aspect_ratio=False eval_env_frameskip=1 fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --device=cpu --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'device': 'cpu', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} git_hash=unknown git_repo_name=not a git repository -[2024-09-01 07:11:59,809][00307] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-09-01 07:11:59,815][00307] Rollout worker 0 uses device cpu -[2024-09-01 07:11:59,817][00307] Rollout worker 1 uses device cpu -[2024-09-01 07:11:59,820][00307] Rollout worker 2 uses device cpu -[2024-09-01 07:11:59,823][00307] Rollout worker 3 uses device cpu -[2024-09-01 07:11:59,825][00307] Rollout worker 4 uses device cpu -[2024-09-01 07:11:59,828][00307] Rollout worker 5 uses device cpu -[2024-09-01 07:11:59,830][00307] Rollout worker 6 uses device cpu -[2024-09-01 07:11:59,837][00307] Rollout worker 7 uses device cpu -[2024-09-01 07:11:59,989][00307] InferenceWorker_p0-w0: min num requests: 2 -[2024-09-01 07:12:00,079][00307] Starting all processes... -[2024-09-01 07:12:00,081][00307] Starting process learner_proc0 -[2024-09-01 07:12:00,141][00307] Starting all processes... -[2024-09-01 07:12:00,153][00307] Starting process inference_proc0-0 -[2024-09-01 07:12:00,154][00307] Starting process rollout_proc0 -[2024-09-01 07:12:00,156][00307] Starting process rollout_proc1 -[2024-09-01 07:12:00,158][00307] Starting process rollout_proc2 -[2024-09-01 07:12:00,158][00307] Starting process rollout_proc3 -[2024-09-01 07:12:00,158][00307] Starting process rollout_proc4 -[2024-09-01 07:12:00,158][00307] Starting process rollout_proc5 -[2024-09-01 07:12:00,159][00307] Starting process rollout_proc6 -[2024-09-01 07:12:00,159][00307] Starting process rollout_proc7 -[2024-09-01 07:12:17,122][16868] Starting seed is not provided -[2024-09-01 07:12:17,124][16868] Initializing actor-critic model on device cpu -[2024-09-01 07:12:17,125][16868] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 07:12:17,127][16868] RunningMeanStd input shape: (1,) -[2024-09-01 07:12:17,247][16888] Worker 5 uses CPU cores [1] -[2024-09-01 07:12:17,371][16868] ConvEncoder: input_channels=3 -[2024-09-01 07:12:17,919][16883] Worker 1 uses CPU cores [1] -[2024-09-01 07:12:17,974][16884] Worker 2 uses CPU cores [0] -[2024-09-01 07:12:18,026][16885] Worker 3 uses CPU cores [1] -[2024-09-01 07:12:18,059][16886] Worker 4 uses CPU cores [0] -[2024-09-01 07:12:18,101][16889] Worker 7 uses CPU cores [1] -[2024-09-01 07:12:18,098][16882] Worker 0 uses CPU cores [0] -[2024-09-01 07:12:18,132][16887] Worker 6 uses CPU cores [0] -[2024-09-01 07:12:18,270][16868] Conv encoder output size: 512 -[2024-09-01 07:12:18,272][16868] Policy head output size: 512 -[2024-09-01 07:12:18,301][16868] Created Actor Critic model with architecture: -[2024-09-01 07:12:18,303][16868] ActorCriticSharedWeights( +[2024-09-01 16:05:45,685][00194] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-01 16:05:45,693][00194] Rollout worker 0 uses device cpu +[2024-09-01 16:05:45,697][00194] Rollout worker 1 uses device cpu +[2024-09-01 16:05:45,701][00194] Rollout worker 2 uses device cpu +[2024-09-01 16:05:45,704][00194] Rollout worker 3 uses device cpu +[2024-09-01 16:05:45,706][00194] Rollout worker 4 uses device cpu +[2024-09-01 16:05:45,707][00194] Rollout worker 5 uses device cpu +[2024-09-01 16:05:45,712][00194] Rollout worker 6 uses device cpu +[2024-09-01 16:05:45,715][00194] Rollout worker 7 uses device cpu +[2024-09-01 16:05:45,925][00194] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-01 16:05:45,969][00194] Starting all processes... +[2024-09-01 16:05:45,971][00194] Starting process learner_proc0 +[2024-09-01 16:05:46,019][00194] Starting all processes... +[2024-09-01 16:05:46,027][00194] Starting process inference_proc0-0 +[2024-09-01 16:05:46,028][00194] Starting process rollout_proc0 +[2024-09-01 16:05:46,028][00194] Starting process rollout_proc1 +[2024-09-01 16:05:46,028][00194] Starting process rollout_proc2 +[2024-09-01 16:05:46,029][00194] Starting process rollout_proc3 +[2024-09-01 16:05:46,029][00194] Starting process rollout_proc4 +[2024-09-01 16:05:46,029][00194] Starting process rollout_proc5 +[2024-09-01 16:05:46,038][00194] Starting process rollout_proc7 +[2024-09-01 16:05:46,038][00194] Starting process rollout_proc6 +[2024-09-01 16:06:06,170][25505] Starting seed is not provided +[2024-09-01 16:06:06,170][25505] Initializing actor-critic model on device cpu +[2024-09-01 16:06:06,171][25505] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 16:06:06,174][25505] RunningMeanStd input shape: (1,) +[2024-09-01 16:06:06,180][00194] Heartbeat connected on Batcher_0 +[2024-09-01 16:06:06,359][25505] ConvEncoder: input_channels=3 +[2024-09-01 16:06:06,444][25520] Worker 1 uses CPU cores [1] +[2024-09-01 16:06:06,584][25523] Worker 4 uses CPU cores [0] +[2024-09-01 16:06:06,611][25524] Worker 5 uses CPU cores [1] +[2024-09-01 16:06:06,653][00194] Heartbeat connected on RolloutWorker_w1 +[2024-09-01 16:06:06,804][00194] Heartbeat connected on RolloutWorker_w4 +[2024-09-01 16:06:06,850][00194] Heartbeat connected on RolloutWorker_w5 +[2024-09-01 16:06:06,868][25522] Worker 3 uses CPU cores [1] +[2024-09-01 16:06:06,906][25518] Worker 0 uses CPU cores [0] +[2024-09-01 16:06:06,916][25525] Worker 6 uses CPU cores [0] +[2024-09-01 16:06:06,920][00194] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-01 16:06:06,977][00194] Heartbeat connected on RolloutWorker_w0 +[2024-09-01 16:06:06,987][00194] Heartbeat connected on RolloutWorker_w6 +[2024-09-01 16:06:06,993][00194] Heartbeat connected on RolloutWorker_w3 +[2024-09-01 16:06:07,002][25521] Worker 2 uses CPU cores [0] +[2024-09-01 16:06:07,014][00194] Heartbeat connected on RolloutWorker_w2 +[2024-09-01 16:06:07,021][25526] Worker 7 uses CPU cores [1] +[2024-09-01 16:06:07,032][00194] Heartbeat connected on RolloutWorker_w7 +[2024-09-01 16:06:07,100][25505] Conv encoder output size: 512 +[2024-09-01 16:06:07,101][25505] Policy head output size: 512 +[2024-09-01 16:06:07,129][25505] Created Actor Critic model with architecture: +[2024-09-01 16:06:07,130][25505] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -1781,5997 +2624,178 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-09-01 07:12:19,271][16868] Using optimizer -[2024-09-01 07:12:19,274][16868] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth... -[2024-09-01 07:12:19,358][16868] Loading model from checkpoint -[2024-09-01 07:12:19,427][16868] Loaded experiment state at self.train_step=376, self.env_steps=1540096 -[2024-09-01 07:12:19,429][16868] Initialized policy 0 weights for model version 376 -[2024-09-01 07:12:19,441][16868] LearnerWorker_p0 finished initialization! -[2024-09-01 07:12:19,471][16881] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 07:12:19,473][16881] RunningMeanStd input shape: (1,) -[2024-09-01 07:12:19,554][16881] ConvEncoder: input_channels=3 -[2024-09-01 07:12:19,898][16881] Conv encoder output size: 512 -[2024-09-01 07:12:19,899][16881] Policy head output size: 512 -[2024-09-01 07:12:19,946][00307] Inference worker 0-0 is ready! -[2024-09-01 07:12:19,949][00307] All inference workers are ready! Signal rollout workers to start! -[2024-09-01 07:12:19,979][00307] Heartbeat connected on Batcher_0 -[2024-09-01 07:12:19,987][00307] Heartbeat connected on LearnerWorker_p0 -[2024-09-01 07:12:20,030][00307] Heartbeat connected on InferenceWorker_p0-w0 -[2024-09-01 07:12:20,179][16885] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,185][16888] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,190][16883] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,192][16889] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,242][16884] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,249][16887] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,253][16886] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:20,262][16882] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 07:12:21,508][16887] Decorrelating experience for 0 frames... -[2024-09-01 07:12:21,988][16885] Decorrelating experience for 0 frames... -[2024-09-01 07:12:21,987][16888] Decorrelating experience for 0 frames... -[2024-09-01 07:12:22,001][16883] Decorrelating experience for 0 frames... -[2024-09-01 07:12:22,047][16887] Decorrelating experience for 32 frames... -[2024-09-01 07:12:22,154][00307] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 1540096. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 07:12:22,564][16882] Decorrelating experience for 0 frames... -[2024-09-01 07:12:23,058][16882] Decorrelating experience for 32 frames... -[2024-09-01 07:12:23,620][16885] Decorrelating experience for 32 frames... -[2024-09-01 07:12:23,629][16888] Decorrelating experience for 32 frames... -[2024-09-01 07:12:23,643][16883] Decorrelating experience for 32 frames... -[2024-09-01 07:12:23,648][16889] Decorrelating experience for 0 frames... -[2024-09-01 07:12:24,341][16885] Decorrelating experience for 64 frames... -[2024-09-01 07:12:24,848][16888] Decorrelating experience for 64 frames... -[2024-09-01 07:12:25,063][16882] Decorrelating experience for 64 frames... -[2024-09-01 07:12:25,088][16887] Decorrelating experience for 64 frames... -[2024-09-01 07:12:25,147][16886] Decorrelating experience for 0 frames... -[2024-09-01 07:12:25,943][16888] Decorrelating experience for 96 frames... -[2024-09-01 07:12:26,210][16884] Decorrelating experience for 0 frames... -[2024-09-01 07:12:26,210][00307] Heartbeat connected on RolloutWorker_w5 -[2024-09-01 07:12:26,247][16887] Decorrelating experience for 96 frames... -[2024-09-01 07:12:26,354][00307] Heartbeat connected on RolloutWorker_w6 -[2024-09-01 07:12:26,679][16883] Decorrelating experience for 64 frames... -[2024-09-01 07:12:26,724][16885] Decorrelating experience for 96 frames... -[2024-09-01 07:12:27,133][00307] Heartbeat connected on RolloutWorker_w3 -[2024-09-01 07:12:27,154][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1540096. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 07:12:27,657][16882] Decorrelating experience for 96 frames... -[2024-09-01 07:12:27,898][00307] Heartbeat connected on RolloutWorker_w0 -[2024-09-01 07:12:29,217][16889] Decorrelating experience for 32 frames... -[2024-09-01 07:12:29,270][16884] Decorrelating experience for 32 frames... -[2024-09-01 07:12:29,492][16883] Decorrelating experience for 96 frames... -[2024-09-01 07:12:29,893][00307] Heartbeat connected on RolloutWorker_w1 -[2024-09-01 07:12:32,156][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1540096. Throughput: 0: 168.8. Samples: 1688. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 07:12:32,161][00307] Avg episode reward: [(0, '3.557')] -[2024-09-01 07:12:32,607][16889] Decorrelating experience for 64 frames... -[2024-09-01 07:12:34,799][16886] Decorrelating experience for 32 frames... -[2024-09-01 07:12:36,129][16884] Decorrelating experience for 64 frames... -[2024-09-01 07:12:36,813][16868] Signal inference workers to stop experience collection... -[2024-09-01 07:12:36,917][16881] InferenceWorker_p0-w0: stopping experience collection -[2024-09-01 07:12:37,154][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1540096. Throughput: 0: 139.6. Samples: 2094. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 07:12:37,157][00307] Avg episode reward: [(0, '3.538')] -[2024-09-01 07:12:38,436][16868] Signal inference workers to resume experience collection... -[2024-09-01 07:12:38,438][16881] InferenceWorker_p0-w0: resuming experience collection -[2024-09-01 07:12:39,019][16886] Decorrelating experience for 64 frames... -[2024-09-01 07:12:39,346][16889] Decorrelating experience for 96 frames... -[2024-09-01 07:12:40,927][00307] Heartbeat connected on RolloutWorker_w7 -[2024-09-01 07:12:42,154][00307] Fps is (10 sec: 409.7, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 1544192. Throughput: 0: 132.8. Samples: 2656. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-09-01 07:12:42,157][00307] Avg episode reward: [(0, '4.207')] -[2024-09-01 07:12:43,814][16884] Decorrelating experience for 96 frames... -[2024-09-01 07:12:43,895][16886] Decorrelating experience for 96 frames... -[2024-09-01 07:12:44,039][00307] Heartbeat connected on RolloutWorker_w2 -[2024-09-01 07:12:44,095][00307] Heartbeat connected on RolloutWorker_w4 -[2024-09-01 07:12:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 327.7, 300 sec: 327.7). Total num frames: 1548288. Throughput: 0: 166.2. Samples: 4154. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) -[2024-09-01 07:12:47,160][00307] Avg episode reward: [(0, '5.277')] -[2024-09-01 07:12:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 1552384. Throughput: 0: 161.7. Samples: 4852. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) -[2024-09-01 07:12:52,157][00307] Avg episode reward: [(0, '5.226')] -[2024-09-01 07:12:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 468.1, 300 sec: 468.1). Total num frames: 1556480. Throughput: 0: 178.2. Samples: 6238. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:12:57,166][00307] Avg episode reward: [(0, '6.285')] -[2024-09-01 07:13:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 512.0, 300 sec: 512.0). Total num frames: 1560576. Throughput: 0: 188.1. Samples: 7524. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:13:02,163][00307] Avg episode reward: [(0, '6.369')] -[2024-09-01 07:13:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 637.2, 300 sec: 637.2). Total num frames: 1568768. Throughput: 0: 183.7. Samples: 8268. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:13:07,157][00307] Avg episode reward: [(0, '6.746')] -[2024-09-01 07:13:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 1572864. Throughput: 0: 212.8. Samples: 9578. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:13:12,164][00307] Avg episode reward: [(0, '6.911')] -[2024-09-01 07:13:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 670.3, 300 sec: 670.3). Total num frames: 1576960. Throughput: 0: 198.2. Samples: 10606. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:13:17,156][00307] Avg episode reward: [(0, '7.025')] -[2024-09-01 07:13:20,960][16881] Updated weights for policy 0, policy_version 386 (0.2626) -[2024-09-01 07:13:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 1581056. Throughput: 0: 212.3. Samples: 11646. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:22,157][00307] Avg episode reward: [(0, '7.650')] -[2024-09-01 07:13:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 693.2). Total num frames: 1585152. Throughput: 0: 235.0. Samples: 13230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:27,156][00307] Avg episode reward: [(0, '7.978')] -[2024-09-01 07:13:32,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 702.2). Total num frames: 1589248. Throughput: 0: 226.7. Samples: 14354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:13:32,162][00307] Avg episode reward: [(0, '8.009')] -[2024-09-01 07:13:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 710.0). Total num frames: 1593344. Throughput: 0: 220.2. Samples: 14760. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:13:37,159][00307] Avg episode reward: [(0, '8.365')] -[2024-09-01 07:13:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 716.8). Total num frames: 1597440. Throughput: 0: 230.2. Samples: 16596. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:42,161][00307] Avg episode reward: [(0, '8.443')] -[2024-09-01 07:13:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 722.8). Total num frames: 1601536. Throughput: 0: 228.3. Samples: 17798. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:47,157][00307] Avg episode reward: [(0, '8.822')] -[2024-09-01 07:13:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 728.2). Total num frames: 1605632. Throughput: 0: 225.5. Samples: 18414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:52,156][00307] Avg episode reward: [(0, '9.096')] -[2024-09-01 07:13:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 776.1). Total num frames: 1613824. Throughput: 0: 229.2. Samples: 19894. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:13:57,157][00307] Avg episode reward: [(0, '9.477')] -[2024-09-01 07:14:00,776][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000395_1617920.pth... -[2024-09-01 07:14:00,901][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000352_1441792.pth -[2024-09-01 07:14:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 778.2). Total num frames: 1617920. Throughput: 0: 240.2. Samples: 21414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:14:02,164][00307] Avg episode reward: [(0, '9.971')] -[2024-09-01 07:14:05,804][16868] Saving new best policy, reward=9.971! -[2024-09-01 07:14:05,825][16881] Updated weights for policy 0, policy_version 396 (0.0585) -[2024-09-01 07:14:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 780.2). Total num frames: 1622016. Throughput: 0: 228.1. Samples: 21910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:07,157][00307] Avg episode reward: [(0, '10.246')] -[2024-09-01 07:14:11,580][16868] Saving new best policy, reward=10.246! -[2024-09-01 07:14:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 782.0). Total num frames: 1626112. Throughput: 0: 216.1. Samples: 22954. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:12,157][00307] Avg episode reward: [(0, '10.156')] -[2024-09-01 07:14:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 783.6). Total num frames: 1630208. Throughput: 0: 222.1. Samples: 24350. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:17,156][00307] Avg episode reward: [(0, '10.473')] -[2024-09-01 07:14:19,384][16868] Saving new best policy, reward=10.473! -[2024-09-01 07:14:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 785.1). Total num frames: 1634304. Throughput: 0: 233.0. Samples: 25246. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:22,162][00307] Avg episode reward: [(0, '10.753')] -[2024-09-01 07:14:24,707][16868] Saving new best policy, reward=10.753! -[2024-09-01 07:14:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 786.4). Total num frames: 1638400. Throughput: 0: 215.1. Samples: 26276. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:27,158][00307] Avg episode reward: [(0, '10.806')] -[2024-09-01 07:14:29,913][16868] Saving new best policy, reward=10.806! -[2024-09-01 07:14:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 787.7). Total num frames: 1642496. Throughput: 0: 224.9. Samples: 27920. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:32,162][00307] Avg episode reward: [(0, '10.848')] -[2024-09-01 07:14:33,703][16868] Saving new best policy, reward=10.848! -[2024-09-01 07:14:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 788.9). Total num frames: 1646592. Throughput: 0: 225.8. Samples: 28574. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:14:37,162][00307] Avg episode reward: [(0, '10.894')] -[2024-09-01 07:14:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 789.9). Total num frames: 1650688. Throughput: 0: 222.4. Samples: 29900. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:14:42,157][00307] Avg episode reward: [(0, '10.830')] -[2024-09-01 07:14:43,597][16868] Saving new best policy, reward=10.894! -[2024-09-01 07:14:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 791.0). Total num frames: 1654784. Throughput: 0: 214.5. Samples: 31066. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:47,161][00307] Avg episode reward: [(0, '11.474')] -[2024-09-01 07:14:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 791.9). Total num frames: 1658880. Throughput: 0: 219.9. Samples: 31806. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:14:52,160][00307] Avg episode reward: [(0, '11.662')] -[2024-09-01 07:14:52,281][16868] Saving new best policy, reward=11.474! -[2024-09-01 07:14:52,295][16881] Updated weights for policy 0, policy_version 406 (0.0577) -[2024-09-01 07:14:56,121][16868] Saving new best policy, reward=11.662! -[2024-09-01 07:14:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1667072. Throughput: 0: 226.8. Samples: 33162. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:14:57,161][00307] Avg episode reward: [(0, '11.520')] -[2024-09-01 07:15:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 793.6). Total num frames: 1667072. Throughput: 0: 218.5. Samples: 34182. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:15:02,156][00307] Avg episode reward: [(0, '11.481')] -[2024-09-01 07:15:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1675264. Throughput: 0: 216.3. Samples: 34978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:15:07,157][00307] Avg episode reward: [(0, '11.701')] -[2024-09-01 07:15:10,677][16868] Saving new best policy, reward=11.701! -[2024-09-01 07:15:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1679360. Throughput: 0: 224.2. Samples: 36366. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:12,158][00307] Avg episode reward: [(0, '11.475')] -[2024-09-01 07:15:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1683456. Throughput: 0: 219.0. Samples: 37774. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:17,163][00307] Avg episode reward: [(0, '11.413')] -[2024-09-01 07:15:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1687552. Throughput: 0: 216.0. Samples: 38294. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:22,160][00307] Avg episode reward: [(0, '11.249')] -[2024-09-01 07:15:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1691648. Throughput: 0: 218.0. Samples: 39710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:27,157][00307] Avg episode reward: [(0, '11.142')] -[2024-09-01 07:15:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1695744. Throughput: 0: 228.6. Samples: 41354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:32,156][00307] Avg episode reward: [(0, '11.170')] -[2024-09-01 07:15:37,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 1699840. Throughput: 0: 222.4. Samples: 41814. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:37,160][00307] Avg episode reward: [(0, '11.221')] -[2024-09-01 07:15:39,187][16881] Updated weights for policy 0, policy_version 416 (0.0563) -[2024-09-01 07:15:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1703936. Throughput: 0: 218.8. Samples: 43006. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:42,158][00307] Avg episode reward: [(0, '11.736')] -[2024-09-01 07:15:47,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 1708032. Throughput: 0: 228.2. Samples: 44452. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:47,157][00307] Avg episode reward: [(0, '11.625')] -[2024-09-01 07:15:47,276][16868] Saving new best policy, reward=11.736! -[2024-09-01 07:15:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 838.7). Total num frames: 1716224. Throughput: 0: 233.9. Samples: 45502. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:52,162][00307] Avg episode reward: [(0, '11.540')] -[2024-09-01 07:15:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 1716224. Throughput: 0: 225.0. Samples: 46490. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:15:57,160][00307] Avg episode reward: [(0, '11.689')] -[2024-09-01 07:16:01,619][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000421_1724416.pth... -[2024-09-01 07:16:01,737][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000376_1540096.pth -[2024-09-01 07:16:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 837.8). Total num frames: 1724416. Throughput: 0: 222.3. Samples: 47776. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:16:02,165][00307] Avg episode reward: [(0, '11.770')] -[2024-09-01 07:16:05,620][16868] Saving new best policy, reward=11.770! -[2024-09-01 07:16:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 837.4). Total num frames: 1728512. Throughput: 0: 231.0. Samples: 48690. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:16:07,157][00307] Avg episode reward: [(0, '11.593')] -[2024-09-01 07:16:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 837.0). Total num frames: 1732608. Throughput: 0: 223.6. Samples: 49770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:16:12,164][00307] Avg episode reward: [(0, '11.559')] -[2024-09-01 07:16:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 836.6). Total num frames: 1736704. Throughput: 0: 202.2. Samples: 50454. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:16:17,157][00307] Avg episode reward: [(0, '12.050')] -[2024-09-01 07:16:20,392][16868] Saving new best policy, reward=12.050! -[2024-09-01 07:16:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 836.3). Total num frames: 1740800. Throughput: 0: 221.4. Samples: 51778. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:16:22,163][00307] Avg episode reward: [(0, '12.250')] -[2024-09-01 07:16:24,373][16881] Updated weights for policy 0, policy_version 426 (0.0592) -[2024-09-01 07:16:24,380][16868] Saving new best policy, reward=12.250! -[2024-09-01 07:16:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 835.9). Total num frames: 1744896. Throughput: 0: 231.4. Samples: 53420. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:27,157][00307] Avg episode reward: [(0, '12.339')] -[2024-09-01 07:16:27,903][16868] Signal inference workers to stop experience collection... (50 times) -[2024-09-01 07:16:27,978][16881] InferenceWorker_p0-w0: stopping experience collection (50 times) -[2024-09-01 07:16:29,927][16868] Signal inference workers to resume experience collection... (50 times) -[2024-09-01 07:16:29,928][16881] InferenceWorker_p0-w0: resuming experience collection (50 times) -[2024-09-01 07:16:29,934][16868] Saving new best policy, reward=12.339! -[2024-09-01 07:16:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 835.6). Total num frames: 1748992. Throughput: 0: 220.8. Samples: 54386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:32,161][00307] Avg episode reward: [(0, '12.319')] -[2024-09-01 07:16:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 835.3). Total num frames: 1753088. Throughput: 0: 209.2. Samples: 54918. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:37,157][00307] Avg episode reward: [(0, '12.973')] -[2024-09-01 07:16:38,902][16868] Saving new best policy, reward=12.973! -[2024-09-01 07:16:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 835.0). Total num frames: 1757184. Throughput: 0: 227.4. Samples: 56722. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:42,163][00307] Avg episode reward: [(0, '12.665')] -[2024-09-01 07:16:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 834.7). Total num frames: 1761280. Throughput: 0: 223.2. Samples: 57822. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:47,161][00307] Avg episode reward: [(0, '12.537')] -[2024-09-01 07:16:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 834.4). Total num frames: 1765376. Throughput: 0: 211.5. Samples: 58208. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:52,161][00307] Avg episode reward: [(0, '12.044')] -[2024-09-01 07:16:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 834.1). Total num frames: 1769472. Throughput: 0: 225.8. Samples: 59932. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:16:57,161][00307] Avg episode reward: [(0, '11.735')] -[2024-09-01 07:17:02,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 848.4). Total num frames: 1777664. Throughput: 0: 240.6. Samples: 61284. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:02,160][00307] Avg episode reward: [(0, '11.873')] -[2024-09-01 07:17:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.9). Total num frames: 1781760. Throughput: 0: 226.3. Samples: 61962. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:07,159][00307] Avg episode reward: [(0, '11.994')] -[2024-09-01 07:17:11,735][16881] Updated weights for policy 0, policy_version 436 (0.0568) -[2024-09-01 07:17:12,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 847.4). Total num frames: 1785856. Throughput: 0: 213.2. Samples: 63012. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:17:12,156][00307] Avg episode reward: [(0, '11.500')] -[2024-09-01 07:17:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 1789952. Throughput: 0: 222.8. Samples: 64412. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:17:17,163][00307] Avg episode reward: [(0, '11.542')] -[2024-09-01 07:17:22,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 1794048. Throughput: 0: 228.9. Samples: 65220. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:22,160][00307] Avg episode reward: [(0, '11.690')] -[2024-09-01 07:17:27,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 1798144. Throughput: 0: 211.0. Samples: 66216. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:27,164][00307] Avg episode reward: [(0, '11.860')] -[2024-09-01 07:17:32,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1802240. Throughput: 0: 226.9. Samples: 68032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:32,156][00307] Avg episode reward: [(0, '11.428')] -[2024-09-01 07:17:37,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1806336. Throughput: 0: 232.9. Samples: 68690. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:37,162][00307] Avg episode reward: [(0, '11.191')] -[2024-09-01 07:17:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1810432. Throughput: 0: 220.4. Samples: 69852. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:42,160][00307] Avg episode reward: [(0, '11.305')] -[2024-09-01 07:17:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1814528. Throughput: 0: 218.6. Samples: 71120. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:47,156][00307] Avg episode reward: [(0, '11.222')] -[2024-09-01 07:17:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1822720. Throughput: 0: 221.3. Samples: 71922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:17:52,158][00307] Avg episode reward: [(0, '11.470')] -[2024-09-01 07:17:55,722][16881] Updated weights for policy 0, policy_version 446 (0.1141) -[2024-09-01 07:17:57,162][00307] Fps is (10 sec: 1227.8, 60 sec: 955.6, 300 sec: 902.5). Total num frames: 1826816. Throughput: 0: 228.3. Samples: 73286. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:17:57,165][00307] Avg episode reward: [(0, '11.025')] -[2024-09-01 07:18:01,590][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000447_1830912.pth... -[2024-09-01 07:18:01,746][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000395_1617920.pth -[2024-09-01 07:18:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1830912. Throughput: 0: 220.0. Samples: 74314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:18:02,157][00307] Avg episode reward: [(0, '11.515')] -[2024-09-01 07:18:07,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1835008. Throughput: 0: 220.3. Samples: 75134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:18:07,160][00307] Avg episode reward: [(0, '11.579')] -[2024-09-01 07:18:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1839104. Throughput: 0: 222.6. Samples: 76234. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:18:12,156][00307] Avg episode reward: [(0, '11.652')] -[2024-09-01 07:18:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1843200. Throughput: 0: 215.6. Samples: 77734. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:18:17,157][00307] Avg episode reward: [(0, '12.488')] -[2024-09-01 07:18:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1847296. Throughput: 0: 210.8. Samples: 78176. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:18:22,157][00307] Avg episode reward: [(0, '12.620')] -[2024-09-01 07:18:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1851392. Throughput: 0: 218.3. Samples: 79676. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:18:27,160][00307] Avg episode reward: [(0, '12.877')] -[2024-09-01 07:18:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1855488. Throughput: 0: 225.9. Samples: 81286. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:18:32,157][00307] Avg episode reward: [(0, '12.735')] -[2024-09-01 07:18:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1859584. Throughput: 0: 219.8. Samples: 81814. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:18:37,161][00307] Avg episode reward: [(0, '12.339')] -[2024-09-01 07:18:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1863680. Throughput: 0: 214.4. Samples: 82932. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:18:42,159][00307] Avg episode reward: [(0, '12.163')] -[2024-09-01 07:18:43,705][16881] Updated weights for policy 0, policy_version 456 (0.1046) -[2024-09-01 07:18:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1871872. Throughput: 0: 222.2. Samples: 84312. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:18:47,158][00307] Avg episode reward: [(0, '12.241')] -[2024-09-01 07:18:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1875968. Throughput: 0: 227.1. Samples: 85354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:18:52,161][00307] Avg episode reward: [(0, '12.499')] -[2024-09-01 07:18:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 1875968. Throughput: 0: 226.0. Samples: 86404. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:18:57,160][00307] Avg episode reward: [(0, '12.786')] -[2024-09-01 07:19:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1884160. Throughput: 0: 215.9. Samples: 87448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:02,157][00307] Avg episode reward: [(0, '12.824')] -[2024-09-01 07:19:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1888256. Throughput: 0: 228.0. Samples: 88438. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:07,156][00307] Avg episode reward: [(0, '12.670')] -[2024-09-01 07:19:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1892352. Throughput: 0: 220.3. Samples: 89588. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:12,157][00307] Avg episode reward: [(0, '12.350')] -[2024-09-01 07:19:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1896448. Throughput: 0: 212.7. Samples: 90858. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:17,159][00307] Avg episode reward: [(0, '12.118')] -[2024-09-01 07:19:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1900544. Throughput: 0: 216.0. Samples: 91532. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:19:22,157][00307] Avg episode reward: [(0, '12.525')] -[2024-09-01 07:19:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1904640. Throughput: 0: 229.8. Samples: 93272. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:19:27,156][00307] Avg episode reward: [(0, '12.339')] -[2024-09-01 07:19:28,295][16881] Updated weights for policy 0, policy_version 466 (0.2831) -[2024-09-01 07:19:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1908736. Throughput: 0: 225.7. Samples: 94470. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:19:32,161][00307] Avg episode reward: [(0, '12.041')] -[2024-09-01 07:19:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1912832. Throughput: 0: 212.1. Samples: 94900. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:19:37,159][00307] Avg episode reward: [(0, '12.049')] -[2024-09-01 07:19:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1916928. Throughput: 0: 225.7. Samples: 96560. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:42,166][00307] Avg episode reward: [(0, '12.290')] -[2024-09-01 07:19:47,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 1925120. Throughput: 0: 228.0. Samples: 97710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:47,163][00307] Avg episode reward: [(0, '12.286')] -[2024-09-01 07:19:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1925120. Throughput: 0: 221.4. Samples: 98402. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:52,161][00307] Avg episode reward: [(0, '12.433')] -[2024-09-01 07:19:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1933312. Throughput: 0: 227.0. Samples: 99804. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:19:57,157][00307] Avg episode reward: [(0, '12.710')] -[2024-09-01 07:20:00,303][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000473_1937408.pth... -[2024-09-01 07:20:00,413][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000421_1724416.pth -[2024-09-01 07:20:02,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1937408. Throughput: 0: 236.9. Samples: 101518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:02,165][00307] Avg episode reward: [(0, '13.144')] -[2024-09-01 07:20:05,431][16868] Saving new best policy, reward=13.144! -[2024-09-01 07:20:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1941504. Throughput: 0: 228.5. Samples: 101816. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:20:07,162][00307] Avg episode reward: [(0, '13.144')] -[2024-09-01 07:20:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1945600. Throughput: 0: 213.6. Samples: 102886. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:20:12,162][00307] Avg episode reward: [(0, '13.167')] -[2024-09-01 07:20:15,044][16868] Saving new best policy, reward=13.167! -[2024-09-01 07:20:15,056][16881] Updated weights for policy 0, policy_version 476 (0.0570) -[2024-09-01 07:20:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1949696. Throughput: 0: 227.7. Samples: 104718. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:17,156][00307] Avg episode reward: [(0, '13.055')] -[2024-09-01 07:20:17,397][16868] Signal inference workers to stop experience collection... (100 times) -[2024-09-01 07:20:17,458][16881] InferenceWorker_p0-w0: stopping experience collection (100 times) -[2024-09-01 07:20:18,983][16868] Signal inference workers to resume experience collection... (100 times) -[2024-09-01 07:20:18,984][16881] InferenceWorker_p0-w0: resuming experience collection (100 times) -[2024-09-01 07:20:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1953792. Throughput: 0: 227.3. Samples: 105128. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:22,156][00307] Avg episode reward: [(0, '13.412')] -[2024-09-01 07:20:24,507][16868] Saving new best policy, reward=13.412! -[2024-09-01 07:20:27,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 1957888. Throughput: 0: 215.8. Samples: 106272. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:20:27,162][00307] Avg episode reward: [(0, '13.470')] -[2024-09-01 07:20:29,543][16868] Saving new best policy, reward=13.470! -[2024-09-01 07:20:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1961984. Throughput: 0: 226.7. Samples: 107912. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:20:32,164][00307] Avg episode reward: [(0, '13.533')] -[2024-09-01 07:20:37,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1966080. Throughput: 0: 226.6. Samples: 108600. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:37,161][00307] Avg episode reward: [(0, '13.701')] -[2024-09-01 07:20:37,176][16868] Saving new best policy, reward=13.533! -[2024-09-01 07:20:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1970176. Throughput: 0: 224.8. Samples: 109922. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:42,157][00307] Avg episode reward: [(0, '14.059')] -[2024-09-01 07:20:43,191][16868] Saving new best policy, reward=13.701! -[2024-09-01 07:20:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 1974272. Throughput: 0: 211.1. Samples: 111018. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:47,164][00307] Avg episode reward: [(0, '14.027')] -[2024-09-01 07:20:48,059][16868] Saving new best policy, reward=14.059! -[2024-09-01 07:20:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 1982464. Throughput: 0: 222.4. Samples: 111824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:52,157][00307] Avg episode reward: [(0, '13.893')] -[2024-09-01 07:20:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1986560. Throughput: 0: 228.2. Samples: 113154. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:20:57,163][00307] Avg episode reward: [(0, '14.243')] -[2024-09-01 07:21:01,511][16868] Saving new best policy, reward=14.243! -[2024-09-01 07:21:01,499][16881] Updated weights for policy 0, policy_version 486 (0.0547) -[2024-09-01 07:21:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1990656. Throughput: 0: 209.6. Samples: 114152. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:02,159][00307] Avg episode reward: [(0, '14.405')] -[2024-09-01 07:21:06,243][16868] Saving new best policy, reward=14.405! -[2024-09-01 07:21:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1994752. Throughput: 0: 220.0. Samples: 115028. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:07,165][00307] Avg episode reward: [(0, '14.750')] -[2024-09-01 07:21:10,177][16868] Saving new best policy, reward=14.750! -[2024-09-01 07:21:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 1998848. Throughput: 0: 226.1. Samples: 116446. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:12,159][00307] Avg episode reward: [(0, '14.057')] -[2024-09-01 07:21:17,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2002944. Throughput: 0: 218.7. Samples: 117754. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:17,165][00307] Avg episode reward: [(0, '14.028')] -[2024-09-01 07:21:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2007040. Throughput: 0: 213.2. Samples: 118192. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:22,161][00307] Avg episode reward: [(0, '14.031')] -[2024-09-01 07:21:27,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2011136. Throughput: 0: 218.2. Samples: 119740. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:27,157][00307] Avg episode reward: [(0, '14.140')] -[2024-09-01 07:21:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2015232. Throughput: 0: 228.3. Samples: 121292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:32,157][00307] Avg episode reward: [(0, '13.883')] -[2024-09-01 07:21:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2019328. Throughput: 0: 222.2. Samples: 121822. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:37,157][00307] Avg episode reward: [(0, '13.936')] -[2024-09-01 07:21:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2023424. Throughput: 0: 217.2. Samples: 122926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:42,163][00307] Avg episode reward: [(0, '14.501')] -[2024-09-01 07:21:46,781][16881] Updated weights for policy 0, policy_version 496 (0.0730) -[2024-09-01 07:21:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2031616. Throughput: 0: 227.6. Samples: 124392. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:47,160][00307] Avg episode reward: [(0, '14.449')] -[2024-09-01 07:21:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2035712. Throughput: 0: 230.1. Samples: 125384. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:52,157][00307] Avg episode reward: [(0, '14.345')] -[2024-09-01 07:21:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2035712. Throughput: 0: 221.2. Samples: 126402. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:21:57,158][00307] Avg episode reward: [(0, '14.394')] -[2024-09-01 07:22:01,004][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000499_2043904.pth... -[2024-09-01 07:22:01,119][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000447_1830912.pth -[2024-09-01 07:22:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2043904. Throughput: 0: 224.4. Samples: 127852. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:02,163][00307] Avg episode reward: [(0, '14.159')] -[2024-09-01 07:22:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2048000. Throughput: 0: 227.9. Samples: 128448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:07,162][00307] Avg episode reward: [(0, '13.779')] -[2024-09-01 07:22:12,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2052096. Throughput: 0: 223.6. Samples: 129804. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:12,163][00307] Avg episode reward: [(0, '14.327')] -[2024-09-01 07:22:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2056192. Throughput: 0: 214.2. Samples: 130932. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:17,162][00307] Avg episode reward: [(0, '13.851')] -[2024-09-01 07:22:22,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2060288. Throughput: 0: 217.3. Samples: 131602. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:22,157][00307] Avg episode reward: [(0, '13.949')] -[2024-09-01 07:22:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2064384. Throughput: 0: 233.6. Samples: 133438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:22:27,156][00307] Avg episode reward: [(0, '13.507')] -[2024-09-01 07:22:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2068480. Throughput: 0: 225.9. Samples: 134558. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:22:32,158][00307] Avg episode reward: [(0, '13.435')] -[2024-09-01 07:22:33,559][16881] Updated weights for policy 0, policy_version 506 (0.2045) -[2024-09-01 07:22:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2072576. Throughput: 0: 217.2. Samples: 135160. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:22:37,160][00307] Avg episode reward: [(0, '14.036')] -[2024-09-01 07:22:42,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2080768. Throughput: 0: 226.4. Samples: 136590. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:42,157][00307] Avg episode reward: [(0, '14.004')] -[2024-09-01 07:22:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2084864. Throughput: 0: 221.0. Samples: 137796. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:47,163][00307] Avg episode reward: [(0, '14.319')] -[2024-09-01 07:22:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2088960. Throughput: 0: 222.8. Samples: 138472. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:52,163][00307] Avg episode reward: [(0, '14.368')] -[2024-09-01 07:22:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2093056. Throughput: 0: 223.3. Samples: 139852. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:22:57,159][00307] Avg episode reward: [(0, '14.741')] -[2024-09-01 07:23:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2097152. Throughput: 0: 237.7. Samples: 141628. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:02,158][00307] Avg episode reward: [(0, '14.801')] -[2024-09-01 07:23:04,763][16868] Saving new best policy, reward=14.801! -[2024-09-01 07:23:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2101248. Throughput: 0: 228.2. Samples: 141870. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:07,159][00307] Avg episode reward: [(0, '14.545')] -[2024-09-01 07:23:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 2105344. Throughput: 0: 215.7. Samples: 143144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:12,162][00307] Avg episode reward: [(0, '14.294')] -[2024-09-01 07:23:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2109440. Throughput: 0: 228.4. Samples: 144838. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:17,159][00307] Avg episode reward: [(0, '15.037')] -[2024-09-01 07:23:18,504][16881] Updated weights for policy 0, policy_version 516 (0.0549) -[2024-09-01 07:23:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2113536. Throughput: 0: 223.6. Samples: 145224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:22,157][00307] Avg episode reward: [(0, '14.951')] -[2024-09-01 07:23:23,602][16868] Saving new best policy, reward=15.037! -[2024-09-01 07:23:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2117632. Throughput: 0: 217.3. Samples: 146370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:27,157][00307] Avg episode reward: [(0, '14.917')] -[2024-09-01 07:23:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2121728. Throughput: 0: 224.7. Samples: 147908. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:32,162][00307] Avg episode reward: [(0, '15.147')] -[2024-09-01 07:23:36,225][16868] Saving new best policy, reward=15.147! -[2024-09-01 07:23:37,159][00307] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2129920. Throughput: 0: 232.0. Samples: 148912. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:37,170][00307] Avg episode reward: [(0, '15.686')] -[2024-09-01 07:23:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2129920. Throughput: 0: 224.7. Samples: 149964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:23:42,159][00307] Avg episode reward: [(0, '16.184')] -[2024-09-01 07:23:42,445][16868] Saving new best policy, reward=15.686! -[2024-09-01 07:23:47,107][16868] Saving new best policy, reward=16.184! -[2024-09-01 07:23:47,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2138112. Throughput: 0: 207.2. Samples: 150950. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:47,162][00307] Avg episode reward: [(0, '16.801')] -[2024-09-01 07:23:50,972][16868] Saving new best policy, reward=16.801! -[2024-09-01 07:23:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2142208. Throughput: 0: 224.5. Samples: 151972. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:52,165][00307] Avg episode reward: [(0, '16.521')] -[2024-09-01 07:23:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2146304. Throughput: 0: 221.6. Samples: 153116. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:23:57,162][00307] Avg episode reward: [(0, '16.792')] -[2024-09-01 07:24:00,521][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth... -[2024-09-01 07:24:00,663][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000473_1937408.pth -[2024-09-01 07:24:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2150400. Throughput: 0: 210.7. Samples: 154320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:02,162][00307] Avg episode reward: [(0, '17.239')] -[2024-09-01 07:24:05,346][16868] Saving new best policy, reward=17.239! -[2024-09-01 07:24:05,360][16881] Updated weights for policy 0, policy_version 526 (0.0708) -[2024-09-01 07:24:07,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2154496. Throughput: 0: 218.1. Samples: 155040. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:07,163][00307] Avg episode reward: [(0, '17.689')] -[2024-09-01 07:24:07,681][16868] Signal inference workers to stop experience collection... (150 times) -[2024-09-01 07:24:07,741][16881] InferenceWorker_p0-w0: stopping experience collection (150 times) -[2024-09-01 07:24:09,206][16868] Signal inference workers to resume experience collection... (150 times) -[2024-09-01 07:24:09,207][16868] Saving new best policy, reward=17.689! -[2024-09-01 07:24:09,209][16881] InferenceWorker_p0-w0: resuming experience collection (150 times) -[2024-09-01 07:24:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2158592. Throughput: 0: 230.4. Samples: 156736. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:12,165][00307] Avg episode reward: [(0, '17.980')] -[2024-09-01 07:24:17,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2162688. Throughput: 0: 222.4. Samples: 157914. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:17,160][00307] Avg episode reward: [(0, '18.341')] -[2024-09-01 07:24:19,522][16868] Saving new best policy, reward=17.980! -[2024-09-01 07:24:19,676][16868] Saving new best policy, reward=18.341! -[2024-09-01 07:24:22,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2166784. Throughput: 0: 207.3. Samples: 158240. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:22,162][00307] Avg episode reward: [(0, '18.190')] -[2024-09-01 07:24:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2170880. Throughput: 0: 219.6. Samples: 159848. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:24:27,161][00307] Avg episode reward: [(0, '18.007')] -[2024-09-01 07:24:32,154][00307] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 2179072. Throughput: 0: 212.7. Samples: 160520. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:24:32,159][00307] Avg episode reward: [(0, '17.799')] -[2024-09-01 07:24:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 888.6). Total num frames: 2179072. Throughput: 0: 219.8. Samples: 161864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:24:37,156][00307] Avg episode reward: [(0, '17.315')] -[2024-09-01 07:24:42,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2183168. Throughput: 0: 227.6. Samples: 163358. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:24:42,163][00307] Avg episode reward: [(0, '17.347')] -[2024-09-01 07:24:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 2191360. Throughput: 0: 231.2. Samples: 164724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:24:47,157][00307] Avg episode reward: [(0, '17.143')] -[2024-09-01 07:24:52,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2191360. Throughput: 0: 229.5. Samples: 165366. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:24:52,177][00307] Avg episode reward: [(0, '16.770')] -[2024-09-01 07:24:53,355][16881] Updated weights for policy 0, policy_version 536 (0.2204) -[2024-09-01 07:24:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2195456. Throughput: 0: 198.8. Samples: 165684. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:24:57,161][00307] Avg episode reward: [(0, '16.281')] -[2024-09-01 07:25:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 2195456. Throughput: 0: 188.4. Samples: 166392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:25:02,162][00307] Avg episode reward: [(0, '16.362')] -[2024-09-01 07:25:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 2203648. Throughput: 0: 204.5. Samples: 167442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:25:07,161][00307] Avg episode reward: [(0, '16.726')] -[2024-09-01 07:25:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2207744. Throughput: 0: 197.6. Samples: 168740. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:12,161][00307] Avg episode reward: [(0, '16.163')] -[2024-09-01 07:25:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2211840. Throughput: 0: 202.8. Samples: 169646. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:17,158][00307] Avg episode reward: [(0, '16.760')] -[2024-09-01 07:25:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 874.8). Total num frames: 2215936. Throughput: 0: 190.6. Samples: 170440. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:25:22,158][00307] Avg episode reward: [(0, '16.118')] -[2024-09-01 07:25:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2220032. Throughput: 0: 190.8. Samples: 171942. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:25:27,157][00307] Avg episode reward: [(0, '16.257')] -[2024-09-01 07:25:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 874.7). Total num frames: 2224128. Throughput: 0: 191.7. Samples: 173352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:25:32,161][00307] Avg episode reward: [(0, '16.258')] -[2024-09-01 07:25:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2228224. Throughput: 0: 184.1. Samples: 173652. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:25:37,156][00307] Avg episode reward: [(0, '16.166')] -[2024-09-01 07:25:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2232320. Throughput: 0: 208.2. Samples: 175054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:42,156][00307] Avg episode reward: [(0, '16.013')] -[2024-09-01 07:25:43,812][16881] Updated weights for policy 0, policy_version 546 (0.1581) -[2024-09-01 07:25:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 2236416. Throughput: 0: 227.6. Samples: 176634. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:47,161][00307] Avg episode reward: [(0, '15.559')] -[2024-09-01 07:25:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2240512. Throughput: 0: 216.1. Samples: 177166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:52,161][00307] Avg episode reward: [(0, '15.272')] -[2024-09-01 07:25:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2244608. Throughput: 0: 215.8. Samples: 178452. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:25:57,162][00307] Avg episode reward: [(0, '15.244')] -[2024-09-01 07:26:01,818][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000550_2252800.pth... -[2024-09-01 07:26:01,942][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000499_2043904.pth -[2024-09-01 07:26:02,156][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 2252800. Throughput: 0: 225.6. Samples: 179800. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:26:02,173][00307] Avg episode reward: [(0, '15.466')] -[2024-09-01 07:26:07,159][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2256896. Throughput: 0: 229.8. Samples: 180780. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:26:07,162][00307] Avg episode reward: [(0, '15.895')] -[2024-09-01 07:26:12,154][00307] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2256896. Throughput: 0: 220.0. Samples: 181842. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:26:12,157][00307] Avg episode reward: [(0, '15.878')] -[2024-09-01 07:26:17,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2265088. Throughput: 0: 215.1. Samples: 183032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:26:17,161][00307] Avg episode reward: [(0, '16.230')] -[2024-09-01 07:26:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2269184. Throughput: 0: 227.6. Samples: 183896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:26:22,162][00307] Avg episode reward: [(0, '16.910')] -[2024-09-01 07:26:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2273280. Throughput: 0: 224.0. Samples: 185134. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:27,165][00307] Avg episode reward: [(0, '16.960')] -[2024-09-01 07:26:30,960][16881] Updated weights for policy 0, policy_version 556 (0.0564) -[2024-09-01 07:26:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2277376. Throughput: 0: 217.4. Samples: 186416. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:32,156][00307] Avg episode reward: [(0, '16.661')] -[2024-09-01 07:26:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2281472. Throughput: 0: 219.8. Samples: 187058. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:37,156][00307] Avg episode reward: [(0, '16.882')] -[2024-09-01 07:26:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2285568. Throughput: 0: 230.4. Samples: 188820. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:42,157][00307] Avg episode reward: [(0, '16.906')] -[2024-09-01 07:26:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2289664. Throughput: 0: 220.1. Samples: 189702. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:47,156][00307] Avg episode reward: [(0, '17.188')] -[2024-09-01 07:26:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2293760. Throughput: 0: 209.0. Samples: 190182. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:52,156][00307] Avg episode reward: [(0, '17.278')] -[2024-09-01 07:26:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2297856. Throughput: 0: 227.6. Samples: 192084. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:26:57,162][00307] Avg episode reward: [(0, '17.541')] -[2024-09-01 07:27:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2301952. Throughput: 0: 224.8. Samples: 193150. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:02,161][00307] Avg episode reward: [(0, '17.829')] -[2024-09-01 07:27:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 860.9). Total num frames: 2306048. Throughput: 0: 215.3. Samples: 193586. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:07,162][00307] Avg episode reward: [(0, '17.792')] -[2024-09-01 07:27:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 2314240. Throughput: 0: 222.1. Samples: 195128. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:12,160][00307] Avg episode reward: [(0, '17.868')] -[2024-09-01 07:27:15,697][16881] Updated weights for policy 0, policy_version 566 (0.0043) -[2024-09-01 07:27:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2318336. Throughput: 0: 225.2. Samples: 196552. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:17,159][00307] Avg episode reward: [(0, '17.583')] -[2024-09-01 07:27:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2322432. Throughput: 0: 226.2. Samples: 197236. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:27:22,161][00307] Avg episode reward: [(0, '17.534')] -[2024-09-01 07:27:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2326528. Throughput: 0: 209.8. Samples: 198262. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:27:27,157][00307] Avg episode reward: [(0, '17.541')] -[2024-09-01 07:27:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2330624. Throughput: 0: 224.4. Samples: 199802. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:27:32,160][00307] Avg episode reward: [(0, '17.833')] -[2024-09-01 07:27:37,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 2334720. Throughput: 0: 228.3. Samples: 200458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:27:37,166][00307] Avg episode reward: [(0, '17.973')] -[2024-09-01 07:27:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2338816. Throughput: 0: 208.4. Samples: 201464. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:42,157][00307] Avg episode reward: [(0, '19.016')] -[2024-09-01 07:27:44,803][16868] Saving new best policy, reward=19.016! -[2024-09-01 07:27:47,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2342912. Throughput: 0: 223.6. Samples: 203212. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:47,159][00307] Avg episode reward: [(0, '19.130')] -[2024-09-01 07:27:48,804][16868] Saving new best policy, reward=19.130! -[2024-09-01 07:27:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2347008. Throughput: 0: 227.7. Samples: 203834. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:52,157][00307] Avg episode reward: [(0, '19.020')] -[2024-09-01 07:27:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2351104. Throughput: 0: 220.9. Samples: 205068. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:27:57,159][00307] Avg episode reward: [(0, '19.017')] -[2024-09-01 07:27:59,197][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000575_2355200.pth... -[2024-09-01 07:27:59,343][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth -[2024-09-01 07:28:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2355200. Throughput: 0: 218.8. Samples: 206398. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:28:02,166][00307] Avg episode reward: [(0, '19.255')] -[2024-09-01 07:28:03,376][16881] Updated weights for policy 0, policy_version 576 (0.1030) -[2024-09-01 07:28:05,701][16868] Signal inference workers to stop experience collection... (200 times) -[2024-09-01 07:28:05,748][16881] InferenceWorker_p0-w0: stopping experience collection (200 times) -[2024-09-01 07:28:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2359296. Throughput: 0: 218.9. Samples: 207086. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:28:07,169][00307] Avg episode reward: [(0, '18.996')] -[2024-09-01 07:28:07,227][16868] Signal inference workers to resume experience collection... (200 times) -[2024-09-01 07:28:07,228][16881] InferenceWorker_p0-w0: resuming experience collection (200 times) -[2024-09-01 07:28:07,238][16868] Saving new best policy, reward=19.255! -[2024-09-01 07:28:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2367488. Throughput: 0: 225.7. Samples: 208418. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:28:12,159][00307] Avg episode reward: [(0, '19.241')] -[2024-09-01 07:28:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2367488. Throughput: 0: 214.3. Samples: 209444. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:28:17,157][00307] Avg episode reward: [(0, '19.185')] -[2024-09-01 07:28:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2375680. Throughput: 0: 219.8. Samples: 210346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:28:22,163][00307] Avg episode reward: [(0, '19.145')] -[2024-09-01 07:28:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2379776. Throughput: 0: 226.6. Samples: 211660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:28:27,159][00307] Avg episode reward: [(0, '19.171')] -[2024-09-01 07:28:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2383872. Throughput: 0: 214.5. Samples: 212864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:28:32,157][00307] Avg episode reward: [(0, '19.330')] -[2024-09-01 07:28:36,098][16868] Saving new best policy, reward=19.330! -[2024-09-01 07:28:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2387968. Throughput: 0: 215.8. Samples: 213544. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:28:37,165][00307] Avg episode reward: [(0, '19.177')] -[2024-09-01 07:28:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2392064. Throughput: 0: 214.4. Samples: 214718. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:28:42,159][00307] Avg episode reward: [(0, '19.370')] -[2024-09-01 07:28:44,047][16868] Saving new best policy, reward=19.370! -[2024-09-01 07:28:47,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 2396160. Throughput: 0: 224.0. Samples: 216478. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:28:47,161][00307] Avg episode reward: [(0, '19.516')] -[2024-09-01 07:28:49,642][16868] Saving new best policy, reward=19.516! -[2024-09-01 07:28:49,648][16881] Updated weights for policy 0, policy_version 586 (0.1076) -[2024-09-01 07:28:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2400256. Throughput: 0: 213.2. Samples: 216678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:28:52,156][00307] Avg episode reward: [(0, '19.399')] -[2024-09-01 07:28:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2404352. Throughput: 0: 216.6. Samples: 218166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:28:57,157][00307] Avg episode reward: [(0, '19.851')] -[2024-09-01 07:29:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2408448. Throughput: 0: 226.6. Samples: 219642. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:02,161][00307] Avg episode reward: [(0, '19.746')] -[2024-09-01 07:29:02,422][16868] Saving new best policy, reward=19.851! -[2024-09-01 07:29:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2412544. Throughput: 0: 222.5. Samples: 220360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:07,157][00307] Avg episode reward: [(0, '20.418')] -[2024-09-01 07:29:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2416640. Throughput: 0: 215.5. Samples: 221356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:12,161][00307] Avg episode reward: [(0, '21.513')] -[2024-09-01 07:29:13,100][16868] Saving new best policy, reward=20.418! -[2024-09-01 07:29:17,122][16868] Saving new best policy, reward=21.513! -[2024-09-01 07:29:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.8). Total num frames: 2424832. Throughput: 0: 220.0. Samples: 222762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:17,164][00307] Avg episode reward: [(0, '21.076')] -[2024-09-01 07:29:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2428928. Throughput: 0: 227.0. Samples: 223758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:22,157][00307] Avg episode reward: [(0, '20.710')] -[2024-09-01 07:29:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2433024. Throughput: 0: 223.5. Samples: 224776. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:27,165][00307] Avg episode reward: [(0, '20.247')] -[2024-09-01 07:29:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2437120. Throughput: 0: 211.0. Samples: 225972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:32,158][00307] Avg episode reward: [(0, '19.833')] -[2024-09-01 07:29:35,456][16881] Updated weights for policy 0, policy_version 596 (0.0053) -[2024-09-01 07:29:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2441216. Throughput: 0: 225.7. Samples: 226836. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:29:37,167][00307] Avg episode reward: [(0, '19.673')] -[2024-09-01 07:29:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2445312. Throughput: 0: 223.2. Samples: 228208. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:42,163][00307] Avg episode reward: [(0, '19.269')] -[2024-09-01 07:29:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2449408. Throughput: 0: 212.9. Samples: 229224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:47,157][00307] Avg episode reward: [(0, '19.364')] -[2024-09-01 07:29:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2453504. Throughput: 0: 211.9. Samples: 229896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:52,157][00307] Avg episode reward: [(0, '19.492')] -[2024-09-01 07:29:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2457600. Throughput: 0: 232.5. Samples: 231820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:29:57,158][00307] Avg episode reward: [(0, '19.187')] -[2024-09-01 07:30:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2461696. Throughput: 0: 224.4. Samples: 232862. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:02,158][00307] Avg episode reward: [(0, '19.358')] -[2024-09-01 07:30:04,136][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000602_2465792.pth... -[2024-09-01 07:30:04,288][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000550_2252800.pth -[2024-09-01 07:30:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2465792. Throughput: 0: 210.4. Samples: 233224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:07,162][00307] Avg episode reward: [(0, '19.255')] -[2024-09-01 07:30:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2473984. Throughput: 0: 225.6. Samples: 234926. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:12,157][00307] Avg episode reward: [(0, '19.292')] -[2024-09-01 07:30:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2478080. Throughput: 0: 225.4. Samples: 236114. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:17,157][00307] Avg episode reward: [(0, '19.357')] -[2024-09-01 07:30:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2478080. Throughput: 0: 221.2. Samples: 236788. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:22,157][00307] Avg episode reward: [(0, '19.373')] -[2024-09-01 07:30:22,687][16881] Updated weights for policy 0, policy_version 606 (0.0548) -[2024-09-01 07:30:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2486272. Throughput: 0: 219.3. Samples: 238076. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:27,159][00307] Avg episode reward: [(0, '19.528')] -[2024-09-01 07:30:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2490368. Throughput: 0: 235.9. Samples: 239840. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:32,161][00307] Avg episode reward: [(0, '19.120')] -[2024-09-01 07:30:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2494464. Throughput: 0: 227.3. Samples: 240124. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:37,158][00307] Avg episode reward: [(0, '18.803')] -[2024-09-01 07:30:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2498560. Throughput: 0: 208.5. Samples: 241202. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:30:42,161][00307] Avg episode reward: [(0, '18.756')] -[2024-09-01 07:30:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2502656. Throughput: 0: 227.5. Samples: 243100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:30:47,157][00307] Avg episode reward: [(0, '17.475')] -[2024-09-01 07:30:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2506752. Throughput: 0: 232.4. Samples: 243684. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:30:52,157][00307] Avg episode reward: [(0, '17.464')] -[2024-09-01 07:30:57,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2510848. Throughput: 0: 218.7. Samples: 244768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:30:57,159][00307] Avg episode reward: [(0, '17.626')] -[2024-09-01 07:31:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 2514944. Throughput: 0: 227.9. Samples: 246370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:31:02,159][00307] Avg episode reward: [(0, '17.871')] -[2024-09-01 07:31:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2519040. Throughput: 0: 226.8. Samples: 246994. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:31:07,157][00307] Avg episode reward: [(0, '18.518')] -[2024-09-01 07:31:07,355][16881] Updated weights for policy 0, policy_version 616 (0.0546) -[2024-09-01 07:31:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2523136. Throughput: 0: 230.6. Samples: 248454. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:31:12,159][00307] Avg episode reward: [(0, '18.818')] -[2024-09-01 07:31:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2527232. Throughput: 0: 213.6. Samples: 249450. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:31:17,162][00307] Avg episode reward: [(0, '18.656')] -[2024-09-01 07:31:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2535424. Throughput: 0: 219.6. Samples: 250006. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:31:22,163][00307] Avg episode reward: [(0, '19.136')] -[2024-09-01 07:31:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2539520. Throughput: 0: 228.5. Samples: 251484. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:31:27,156][00307] Avg episode reward: [(0, '18.952')] -[2024-09-01 07:31:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2543616. Throughput: 0: 210.8. Samples: 252588. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:32,162][00307] Avg episode reward: [(0, '19.066')] -[2024-09-01 07:31:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2547712. Throughput: 0: 215.4. Samples: 253378. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:37,158][00307] Avg episode reward: [(0, '18.926')] -[2024-09-01 07:31:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2551808. Throughput: 0: 218.8. Samples: 254612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:42,162][00307] Avg episode reward: [(0, '18.963')] -[2024-09-01 07:31:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2555904. Throughput: 0: 218.9. Samples: 256222. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:47,156][00307] Avg episode reward: [(0, '19.329')] -[2024-09-01 07:31:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2560000. Throughput: 0: 214.1. Samples: 256628. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:52,157][00307] Avg episode reward: [(0, '19.597')] -[2024-09-01 07:31:55,647][16881] Updated weights for policy 0, policy_version 626 (0.2058) -[2024-09-01 07:31:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2564096. Throughput: 0: 208.3. Samples: 257826. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:31:57,157][00307] Avg episode reward: [(0, '19.913')] -[2024-09-01 07:31:58,054][16868] Signal inference workers to stop experience collection... (250 times) -[2024-09-01 07:31:58,094][16881] InferenceWorker_p0-w0: stopping experience collection (250 times) -[2024-09-01 07:31:59,070][16868] Signal inference workers to resume experience collection... (250 times) -[2024-09-01 07:31:59,071][16881] InferenceWorker_p0-w0: resuming experience collection (250 times) -[2024-09-01 07:31:59,084][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000627_2568192.pth... -[2024-09-01 07:31:59,222][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000575_2355200.pth -[2024-09-01 07:32:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2568192. Throughput: 0: 228.6. Samples: 259736. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:32:02,160][00307] Avg episode reward: [(0, '20.847')] -[2024-09-01 07:32:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2572288. Throughput: 0: 225.4. Samples: 260148. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:32:07,160][00307] Avg episode reward: [(0, '21.446')] -[2024-09-01 07:32:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2576384. Throughput: 0: 215.0. Samples: 261158. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:32:12,159][00307] Avg episode reward: [(0, '20.995')] -[2024-09-01 07:32:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2580480. Throughput: 0: 226.5. Samples: 262782. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:32:17,162][00307] Avg episode reward: [(0, '20.803')] -[2024-09-01 07:32:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2588672. Throughput: 0: 228.9. Samples: 263680. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:32:22,164][00307] Avg episode reward: [(0, '20.794')] -[2024-09-01 07:32:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2588672. Throughput: 0: 224.6. Samples: 264718. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:32:27,157][00307] Avg episode reward: [(0, '20.566')] -[2024-09-01 07:32:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2596864. Throughput: 0: 211.9. Samples: 265758. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:32:32,157][00307] Avg episode reward: [(0, '20.738')] -[2024-09-01 07:32:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2600960. Throughput: 0: 223.6. Samples: 266692. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:32:37,163][00307] Avg episode reward: [(0, '20.760')] -[2024-09-01 07:32:40,848][16881] Updated weights for policy 0, policy_version 636 (0.1237) -[2024-09-01 07:32:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2605056. Throughput: 0: 221.5. Samples: 267792. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:32:42,158][00307] Avg episode reward: [(0, '21.114')] -[2024-09-01 07:32:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2609152. Throughput: 0: 205.6. Samples: 268986. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:32:47,157][00307] Avg episode reward: [(0, '21.494')] -[2024-09-01 07:32:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2613248. Throughput: 0: 214.6. Samples: 269806. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:32:52,164][00307] Avg episode reward: [(0, '21.359')] -[2024-09-01 07:32:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2617344. Throughput: 0: 230.9. Samples: 271550. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:32:57,157][00307] Avg episode reward: [(0, '21.100')] -[2024-09-01 07:33:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2621440. Throughput: 0: 219.1. Samples: 272642. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:33:02,164][00307] Avg episode reward: [(0, '20.853')] -[2024-09-01 07:33:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2625536. Throughput: 0: 207.2. Samples: 273002. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:33:07,160][00307] Avg episode reward: [(0, '21.073')] -[2024-09-01 07:33:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2629632. Throughput: 0: 226.5. Samples: 274910. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:33:12,161][00307] Avg episode reward: [(0, '20.804')] -[2024-09-01 07:33:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2637824. Throughput: 0: 227.0. Samples: 275974. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:33:17,157][00307] Avg episode reward: [(0, '20.974')] -[2024-09-01 07:33:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2637824. Throughput: 0: 221.3. Samples: 276652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:33:22,162][00307] Avg episode reward: [(0, '20.868')] -[2024-09-01 07:33:26,880][16881] Updated weights for policy 0, policy_version 646 (0.0578) -[2024-09-01 07:33:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2646016. Throughput: 0: 228.9. Samples: 278092. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:33:27,161][00307] Avg episode reward: [(0, '20.200')] -[2024-09-01 07:33:32,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2650112. Throughput: 0: 237.6. Samples: 279676. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:33:32,158][00307] Avg episode reward: [(0, '20.856')] -[2024-09-01 07:33:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2654208. Throughput: 0: 230.0. Samples: 280156. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:33:37,157][00307] Avg episode reward: [(0, '20.460')] -[2024-09-01 07:33:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2658304. Throughput: 0: 213.9. Samples: 281176. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:33:42,161][00307] Avg episode reward: [(0, '19.987')] -[2024-09-01 07:33:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2662400. Throughput: 0: 232.0. Samples: 283080. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:33:47,163][00307] Avg episode reward: [(0, '19.118')] -[2024-09-01 07:33:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2666496. Throughput: 0: 238.2. Samples: 283720. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:33:52,159][00307] Avg episode reward: [(0, '18.836')] -[2024-09-01 07:33:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2670592. Throughput: 0: 218.5. Samples: 284742. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:33:57,161][00307] Avg episode reward: [(0, '18.827')] -[2024-09-01 07:33:59,313][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000653_2674688.pth... -[2024-09-01 07:33:59,431][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000602_2465792.pth -[2024-09-01 07:34:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2674688. Throughput: 0: 230.4. Samples: 286342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:34:02,165][00307] Avg episode reward: [(0, '19.525')] -[2024-09-01 07:34:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2678784. Throughput: 0: 230.4. Samples: 287022. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:34:07,164][00307] Avg episode reward: [(0, '19.109')] -[2024-09-01 07:34:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2682880. Throughput: 0: 225.4. Samples: 288234. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:34:12,157][00307] Avg episode reward: [(0, '19.401')] -[2024-09-01 07:34:12,625][16881] Updated weights for policy 0, policy_version 656 (0.0559) -[2024-09-01 07:34:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2686976. Throughput: 0: 216.4. Samples: 289412. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:34:17,156][00307] Avg episode reward: [(0, '19.576')] -[2024-09-01 07:34:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 2695168. Throughput: 0: 221.0. Samples: 290100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:34:22,156][00307] Avg episode reward: [(0, '19.630')] -[2024-09-01 07:34:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2699264. Throughput: 0: 229.2. Samples: 291492. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:34:27,159][00307] Avg episode reward: [(0, '19.370')] -[2024-09-01 07:34:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2703360. Throughput: 0: 210.8. Samples: 292564. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:34:32,157][00307] Avg episode reward: [(0, '19.170')] -[2024-09-01 07:34:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2707456. Throughput: 0: 215.2. Samples: 293402. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:34:37,157][00307] Avg episode reward: [(0, '18.760')] -[2024-09-01 07:34:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2711552. Throughput: 0: 222.8. Samples: 294770. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:34:42,162][00307] Avg episode reward: [(0, '19.397')] -[2024-09-01 07:34:47,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 2715648. Throughput: 0: 219.2. Samples: 296206. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:34:47,161][00307] Avg episode reward: [(0, '20.004')] -[2024-09-01 07:34:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2719744. Throughput: 0: 210.9. Samples: 296512. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:34:52,158][00307] Avg episode reward: [(0, '19.587')] -[2024-09-01 07:34:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2723840. Throughput: 0: 217.9. Samples: 298038. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:34:57,156][00307] Avg episode reward: [(0, '19.517')] -[2024-09-01 07:34:58,637][16881] Updated weights for policy 0, policy_version 666 (0.1038) -[2024-09-01 07:35:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2727936. Throughput: 0: 221.9. Samples: 299396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:02,156][00307] Avg episode reward: [(0, '20.009')] -[2024-09-01 07:35:07,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2732032. Throughput: 0: 209.8. Samples: 299540. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:35:07,166][00307] Avg episode reward: [(0, '20.009')] -[2024-09-01 07:35:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2732032. Throughput: 0: 193.6. Samples: 300204. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:35:12,157][00307] Avg episode reward: [(0, '20.046')] -[2024-09-01 07:35:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2736128. Throughput: 0: 199.9. Samples: 301560. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:35:17,161][00307] Avg episode reward: [(0, '19.822')] -[2024-09-01 07:35:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2744320. Throughput: 0: 194.5. Samples: 302154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:22,161][00307] Avg episode reward: [(0, '19.680')] -[2024-09-01 07:35:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 2744320. Throughput: 0: 196.6. Samples: 303618. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:27,160][00307] Avg episode reward: [(0, '20.417')] -[2024-09-01 07:35:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 2748416. Throughput: 0: 186.6. Samples: 304604. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:32,161][00307] Avg episode reward: [(0, '20.727')] -[2024-09-01 07:35:37,157][00307] Fps is (10 sec: 1228.9, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2756608. Throughput: 0: 197.2. Samples: 305388. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:37,160][00307] Avg episode reward: [(0, '20.795')] -[2024-09-01 07:35:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2760704. Throughput: 0: 195.3. Samples: 306826. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:42,164][00307] Avg episode reward: [(0, '20.606')] -[2024-09-01 07:35:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2764800. Throughput: 0: 187.9. Samples: 307850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:47,159][00307] Avg episode reward: [(0, '20.538')] -[2024-09-01 07:35:51,609][16881] Updated weights for policy 0, policy_version 676 (0.1578) -[2024-09-01 07:35:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2768896. Throughput: 0: 203.8. Samples: 308710. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:52,158][00307] Avg episode reward: [(0, '20.766')] -[2024-09-01 07:35:54,010][16868] Signal inference workers to stop experience collection... (300 times) -[2024-09-01 07:35:54,052][16881] InferenceWorker_p0-w0: stopping experience collection (300 times) -[2024-09-01 07:35:55,101][16868] Signal inference workers to resume experience collection... (300 times) -[2024-09-01 07:35:55,103][16881] InferenceWorker_p0-w0: resuming experience collection (300 times) -[2024-09-01 07:35:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2772992. Throughput: 0: 215.2. Samples: 309888. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:35:57,157][00307] Avg episode reward: [(0, '20.655')] -[2024-09-01 07:35:59,022][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000678_2777088.pth... -[2024-09-01 07:35:59,133][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000627_2568192.pth -[2024-09-01 07:36:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2777088. Throughput: 0: 219.9. Samples: 311456. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:36:02,162][00307] Avg episode reward: [(0, '20.259')] -[2024-09-01 07:36:07,157][00307] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 2781184. Throughput: 0: 212.9. Samples: 311736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:36:07,165][00307] Avg episode reward: [(0, '20.936')] -[2024-09-01 07:36:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2785280. Throughput: 0: 211.3. Samples: 313126. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:36:12,156][00307] Avg episode reward: [(0, '21.162')] -[2024-09-01 07:36:17,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2789376. Throughput: 0: 226.1. Samples: 314780. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:36:17,164][00307] Avg episode reward: [(0, '21.195')] -[2024-09-01 07:36:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2793472. Throughput: 0: 222.3. Samples: 315392. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:22,157][00307] Avg episode reward: [(0, '20.555')] -[2024-09-01 07:36:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2797568. Throughput: 0: 213.9. Samples: 316452. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:27,157][00307] Avg episode reward: [(0, '21.271')] -[2024-09-01 07:36:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 2805760. Throughput: 0: 223.0. Samples: 317884. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:32,163][00307] Avg episode reward: [(0, '21.791')] -[2024-09-01 07:36:35,947][16868] Saving new best policy, reward=21.791! -[2024-09-01 07:36:35,963][16881] Updated weights for policy 0, policy_version 686 (0.2111) -[2024-09-01 07:36:37,161][00307] Fps is (10 sec: 1227.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2809856. Throughput: 0: 225.5. Samples: 318860. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:37,167][00307] Avg episode reward: [(0, '22.041')] -[2024-09-01 07:36:42,142][16868] Saving new best policy, reward=22.041! -[2024-09-01 07:36:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2813952. Throughput: 0: 221.9. Samples: 319872. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:42,161][00307] Avg episode reward: [(0, '21.913')] -[2024-09-01 07:36:47,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2818048. Throughput: 0: 213.1. Samples: 321046. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:47,157][00307] Avg episode reward: [(0, '21.276')] -[2024-09-01 07:36:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2822144. Throughput: 0: 227.3. Samples: 321964. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:52,163][00307] Avg episode reward: [(0, '21.882')] -[2024-09-01 07:36:57,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2826240. Throughput: 0: 226.3. Samples: 323310. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:36:57,163][00307] Avg episode reward: [(0, '22.331')] -[2024-09-01 07:37:00,790][16868] Saving new best policy, reward=22.331! -[2024-09-01 07:37:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2830336. Throughput: 0: 211.8. Samples: 324310. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:02,160][00307] Avg episode reward: [(0, '22.109')] -[2024-09-01 07:37:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2834432. Throughput: 0: 213.0. Samples: 324976. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:07,162][00307] Avg episode reward: [(0, '22.686')] -[2024-09-01 07:37:09,014][16868] Saving new best policy, reward=22.686! -[2024-09-01 07:37:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2838528. Throughput: 0: 230.3. Samples: 326816. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:12,157][00307] Avg episode reward: [(0, '22.756')] -[2024-09-01 07:37:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2842624. Throughput: 0: 221.6. Samples: 327856. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:17,158][00307] Avg episode reward: [(0, '22.837')] -[2024-09-01 07:37:19,577][16868] Saving new best policy, reward=22.756! -[2024-09-01 07:37:19,756][16868] Saving new best policy, reward=22.837! -[2024-09-01 07:37:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2846720. Throughput: 0: 208.9. Samples: 328260. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:22,162][00307] Avg episode reward: [(0, '22.485')] -[2024-09-01 07:37:23,815][16881] Updated weights for policy 0, policy_version 696 (0.0049) -[2024-09-01 07:37:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2850816. Throughput: 0: 225.4. Samples: 330014. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:37:27,162][00307] Avg episode reward: [(0, '22.919')] -[2024-09-01 07:37:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2854912. Throughput: 0: 225.5. Samples: 331192. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:37:32,159][00307] Avg episode reward: [(0, '22.074')] -[2024-09-01 07:37:32,589][16868] Saving new best policy, reward=22.919! -[2024-09-01 07:37:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.3, 300 sec: 860.9). Total num frames: 2859008. Throughput: 0: 217.9. Samples: 331770. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:37,158][00307] Avg episode reward: [(0, '21.982')] -[2024-09-01 07:37:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2863104. Throughput: 0: 221.1. Samples: 333260. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:42,162][00307] Avg episode reward: [(0, '22.465')] -[2024-09-01 07:37:47,154][00307] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2871296. Throughput: 0: 224.8. Samples: 334424. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:37:47,161][00307] Avg episode reward: [(0, '22.598')] -[2024-09-01 07:37:52,159][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2875392. Throughput: 0: 228.5. Samples: 335260. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:37:52,163][00307] Avg episode reward: [(0, '22.886')] -[2024-09-01 07:37:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2875392. Throughput: 0: 212.9. Samples: 336398. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:37:57,159][00307] Avg episode reward: [(0, '23.246')] -[2024-09-01 07:38:01,212][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000704_2883584.pth... -[2024-09-01 07:38:01,326][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000653_2674688.pth -[2024-09-01 07:38:01,339][16868] Saving new best policy, reward=23.246! -[2024-09-01 07:38:02,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2883584. Throughput: 0: 221.1. Samples: 337804. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:38:02,167][00307] Avg episode reward: [(0, '23.922')] -[2024-09-01 07:38:05,421][16868] Saving new best policy, reward=23.922! -[2024-09-01 07:38:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2887680. Throughput: 0: 228.7. Samples: 338552. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:38:07,157][00307] Avg episode reward: [(0, '23.956')] -[2024-09-01 07:38:11,001][16868] Saving new best policy, reward=23.956! -[2024-09-01 07:38:11,012][16881] Updated weights for policy 0, policy_version 706 (0.0563) -[2024-09-01 07:38:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2891776. Throughput: 0: 212.5. Samples: 339578. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:38:12,167][00307] Avg episode reward: [(0, '24.087')] -[2024-09-01 07:38:16,147][16868] Saving new best policy, reward=24.087! -[2024-09-01 07:38:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2895872. Throughput: 0: 217.8. Samples: 340994. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:38:17,156][00307] Avg episode reward: [(0, '24.483')] -[2024-09-01 07:38:20,055][16868] Saving new best policy, reward=24.483! -[2024-09-01 07:38:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2899968. Throughput: 0: 219.7. Samples: 341656. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:38:22,163][00307] Avg episode reward: [(0, '25.203')] -[2024-09-01 07:38:23,930][16868] Saving new best policy, reward=25.203! -[2024-09-01 07:38:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2904064. Throughput: 0: 220.9. Samples: 343200. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:38:27,164][00307] Avg episode reward: [(0, '25.233')] -[2024-09-01 07:38:30,010][16868] Saving new best policy, reward=25.233! -[2024-09-01 07:38:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2908160. Throughput: 0: 217.5. Samples: 344212. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:38:32,161][00307] Avg episode reward: [(0, '25.647')] -[2024-09-01 07:38:34,764][16868] Saving new best policy, reward=25.647! -[2024-09-01 07:38:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2912256. Throughput: 0: 213.6. Samples: 344872. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:38:37,156][00307] Avg episode reward: [(0, '24.750')] -[2024-09-01 07:38:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2916352. Throughput: 0: 225.3. Samples: 346538. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 07:38:42,162][00307] Avg episode reward: [(0, '24.761')] -[2024-09-01 07:38:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2920448. Throughput: 0: 222.5. Samples: 347816. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 07:38:47,156][00307] Avg episode reward: [(0, '24.476')] -[2024-09-01 07:38:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 860.9). Total num frames: 2924544. Throughput: 0: 210.4. Samples: 348020. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:38:52,160][00307] Avg episode reward: [(0, '24.588')] -[2024-09-01 07:38:56,793][16881] Updated weights for policy 0, policy_version 716 (0.1215) -[2024-09-01 07:38:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 2932736. Throughput: 0: 227.9. Samples: 349832. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:38:57,157][00307] Avg episode reward: [(0, '24.421')] -[2024-09-01 07:39:02,155][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2936832. Throughput: 0: 224.2. Samples: 351084. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:39:02,159][00307] Avg episode reward: [(0, '24.239')] -[2024-09-01 07:39:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2940928. Throughput: 0: 225.6. Samples: 351808. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:07,162][00307] Avg episode reward: [(0, '23.779')] -[2024-09-01 07:39:12,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 2945024. Throughput: 0: 214.8. Samples: 352866. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:12,160][00307] Avg episode reward: [(0, '23.938')] -[2024-09-01 07:39:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2949120. Throughput: 0: 232.7. Samples: 354682. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:17,160][00307] Avg episode reward: [(0, '24.132')] -[2024-09-01 07:39:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2953216. Throughput: 0: 224.9. Samples: 354992. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:22,166][00307] Avg episode reward: [(0, '24.474')] -[2024-09-01 07:39:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2957312. Throughput: 0: 210.9. Samples: 356028. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:27,164][00307] Avg episode reward: [(0, '24.685')] -[2024-09-01 07:39:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2961408. Throughput: 0: 221.5. Samples: 357782. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:39:32,157][00307] Avg episode reward: [(0, '25.072')] -[2024-09-01 07:39:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2965504. Throughput: 0: 234.9. Samples: 358590. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:39:37,157][00307] Avg episode reward: [(0, '24.951')] -[2024-09-01 07:39:42,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 2969600. Throughput: 0: 214.6. Samples: 359492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:39:42,160][00307] Avg episode reward: [(0, '24.505')] -[2024-09-01 07:39:43,896][16881] Updated weights for policy 0, policy_version 726 (0.1270) -[2024-09-01 07:39:46,243][16868] Signal inference workers to stop experience collection... (350 times) -[2024-09-01 07:39:46,307][16881] InferenceWorker_p0-w0: stopping experience collection (350 times) -[2024-09-01 07:39:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 2973696. Throughput: 0: 219.4. Samples: 360958. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:39:47,162][00307] Avg episode reward: [(0, '24.031')] -[2024-09-01 07:39:47,785][16868] Signal inference workers to resume experience collection... (350 times) -[2024-09-01 07:39:47,787][16881] InferenceWorker_p0-w0: resuming experience collection (350 times) -[2024-09-01 07:39:52,154][00307] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 2981888. Throughput: 0: 220.1. Samples: 361714. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:39:52,157][00307] Avg episode reward: [(0, '23.843')] -[2024-09-01 07:39:57,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 2985984. Throughput: 0: 225.3. Samples: 363004. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:39:57,162][00307] Avg episode reward: [(0, '23.114')] -[2024-09-01 07:40:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 860.9). Total num frames: 2985984. Throughput: 0: 207.3. Samples: 364010. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:02,163][00307] Avg episode reward: [(0, '23.444')] -[2024-09-01 07:40:02,313][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000730_2990080.pth... -[2024-09-01 07:40:02,431][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000678_2777088.pth -[2024-09-01 07:40:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2994176. Throughput: 0: 224.2. Samples: 365080. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:07,159][00307] Avg episode reward: [(0, '22.546')] -[2024-09-01 07:40:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 2998272. Throughput: 0: 230.6. Samples: 366406. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:12,157][00307] Avg episode reward: [(0, '22.568')] -[2024-09-01 07:40:17,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 3002368. Throughput: 0: 215.6. Samples: 367484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:17,160][00307] Avg episode reward: [(0, '21.991')] -[2024-09-01 07:40:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3006464. Throughput: 0: 211.7. Samples: 368118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:22,156][00307] Avg episode reward: [(0, '22.201')] -[2024-09-01 07:40:27,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3010560. Throughput: 0: 226.4. Samples: 369680. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:27,161][00307] Avg episode reward: [(0, '22.196')] -[2024-09-01 07:40:28,516][16881] Updated weights for policy 0, policy_version 736 (0.1023) -[2024-09-01 07:40:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3014656. Throughput: 0: 225.5. Samples: 371104. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:32,160][00307] Avg episode reward: [(0, '22.172')] -[2024-09-01 07:40:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3018752. Throughput: 0: 214.4. Samples: 371360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:37,156][00307] Avg episode reward: [(0, '22.195')] -[2024-09-01 07:40:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3022848. Throughput: 0: 217.8. Samples: 372806. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:42,156][00307] Avg episode reward: [(0, '22.153')] -[2024-09-01 07:40:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3031040. Throughput: 0: 214.5. Samples: 373664. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:47,156][00307] Avg episode reward: [(0, '22.278')] -[2024-09-01 07:40:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3031040. Throughput: 0: 217.5. Samples: 374866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:52,157][00307] Avg episode reward: [(0, '22.376')] -[2024-09-01 07:40:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3035136. Throughput: 0: 219.0. Samples: 376260. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:40:57,157][00307] Avg episode reward: [(0, '22.697')] -[2024-09-01 07:41:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3043328. Throughput: 0: 223.4. Samples: 377536. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:41:02,162][00307] Avg episode reward: [(0, '21.770')] -[2024-09-01 07:41:07,157][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3047424. Throughput: 0: 226.4. Samples: 378308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:41:07,161][00307] Avg episode reward: [(0, '21.599')] -[2024-09-01 07:41:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3051520. Throughput: 0: 215.6. Samples: 379382. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:41:12,158][00307] Avg episode reward: [(0, '21.982')] -[2024-09-01 07:41:16,380][16881] Updated weights for policy 0, policy_version 746 (0.1708) -[2024-09-01 07:41:17,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3055616. Throughput: 0: 210.6. Samples: 380580. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:17,159][00307] Avg episode reward: [(0, '21.147')] -[2024-09-01 07:41:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3059712. Throughput: 0: 223.4. Samples: 381414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:22,161][00307] Avg episode reward: [(0, '21.303')] -[2024-09-01 07:41:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3063808. Throughput: 0: 223.6. Samples: 382868. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:27,160][00307] Avg episode reward: [(0, '21.522')] -[2024-09-01 07:41:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 3067904. Throughput: 0: 229.6. Samples: 383994. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:32,163][00307] Avg episode reward: [(0, '20.961')] -[2024-09-01 07:41:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3072000. Throughput: 0: 218.8. Samples: 384712. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:37,156][00307] Avg episode reward: [(0, '20.751')] -[2024-09-01 07:41:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3076096. Throughput: 0: 225.7. Samples: 386418. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:41:42,159][00307] Avg episode reward: [(0, '21.546')] -[2024-09-01 07:41:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3080192. Throughput: 0: 221.3. Samples: 387494. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:41:47,157][00307] Avg episode reward: [(0, '21.943')] -[2024-09-01 07:41:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3084288. Throughput: 0: 216.1. Samples: 388034. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:41:52,162][00307] Avg episode reward: [(0, '22.276')] -[2024-09-01 07:41:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3092480. Throughput: 0: 225.3. Samples: 389522. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:41:57,158][00307] Avg episode reward: [(0, '22.176')] -[2024-09-01 07:42:00,862][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000756_3096576.pth... -[2024-09-01 07:42:00,881][16881] Updated weights for policy 0, policy_version 756 (0.0556) -[2024-09-01 07:42:00,985][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000704_2883584.pth -[2024-09-01 07:42:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3096576. Throughput: 0: 226.7. Samples: 390782. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:02,158][00307] Avg episode reward: [(0, '22.422')] -[2024-09-01 07:42:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3100672. Throughput: 0: 223.2. Samples: 391460. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:07,156][00307] Avg episode reward: [(0, '22.425')] -[2024-09-01 07:42:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3104768. Throughput: 0: 216.5. Samples: 392610. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:12,161][00307] Avg episode reward: [(0, '22.361')] -[2024-09-01 07:42:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3108864. Throughput: 0: 229.9. Samples: 394338. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:17,158][00307] Avg episode reward: [(0, '22.619')] -[2024-09-01 07:42:22,161][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3112960. Throughput: 0: 222.3. Samples: 394716. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:22,164][00307] Avg episode reward: [(0, '22.542')] -[2024-09-01 07:42:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3117056. Throughput: 0: 210.5. Samples: 395890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:27,156][00307] Avg episode reward: [(0, '22.542')] -[2024-09-01 07:42:32,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3121152. Throughput: 0: 227.0. Samples: 397710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:32,164][00307] Avg episode reward: [(0, '23.862')] -[2024-09-01 07:42:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3125248. Throughput: 0: 230.7. Samples: 398414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:42:37,156][00307] Avg episode reward: [(0, '23.837')] -[2024-09-01 07:42:42,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 3129344. Throughput: 0: 218.0. Samples: 399334. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:42,160][00307] Avg episode reward: [(0, '23.547')] -[2024-09-01 07:42:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 3133440. Throughput: 0: 222.8. Samples: 400810. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:47,157][00307] Avg episode reward: [(0, '23.582')] -[2024-09-01 07:42:48,382][16881] Updated weights for policy 0, policy_version 766 (0.1589) -[2024-09-01 07:42:52,154][00307] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3141632. Throughput: 0: 223.2. Samples: 401506. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:52,158][00307] Avg episode reward: [(0, '23.478')] -[2024-09-01 07:42:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3141632. Throughput: 0: 229.2. Samples: 402926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:42:57,157][00307] Avg episode reward: [(0, '23.270')] -[2024-09-01 07:43:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3145728. Throughput: 0: 214.2. Samples: 403978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:43:02,164][00307] Avg episode reward: [(0, '23.446')] -[2024-09-01 07:43:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3153920. Throughput: 0: 222.4. Samples: 404724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:07,165][00307] Avg episode reward: [(0, '24.034')] -[2024-09-01 07:43:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3158016. Throughput: 0: 229.9. Samples: 406234. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:12,161][00307] Avg episode reward: [(0, '24.167')] -[2024-09-01 07:43:17,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3162112. Throughput: 0: 211.5. Samples: 407230. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:43:17,167][00307] Avg episode reward: [(0, '24.349')] -[2024-09-01 07:43:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 3166208. Throughput: 0: 214.5. Samples: 408066. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:43:22,156][00307] Avg episode reward: [(0, '23.819')] -[2024-09-01 07:43:27,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3170304. Throughput: 0: 227.6. Samples: 409576. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:27,157][00307] Avg episode reward: [(0, '23.621')] -[2024-09-01 07:43:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3174400. Throughput: 0: 224.9. Samples: 410932. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:32,167][00307] Avg episode reward: [(0, '23.792')] -[2024-09-01 07:43:34,743][16881] Updated weights for policy 0, policy_version 776 (0.1071) -[2024-09-01 07:43:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3178496. Throughput: 0: 216.5. Samples: 411248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:43:37,158][00307] Avg episode reward: [(0, '24.587')] -[2024-09-01 07:43:38,066][16868] Signal inference workers to stop experience collection... (400 times) -[2024-09-01 07:43:38,129][16881] InferenceWorker_p0-w0: stopping experience collection (400 times) -[2024-09-01 07:43:39,396][16868] Signal inference workers to resume experience collection... (400 times) -[2024-09-01 07:43:39,398][16881] InferenceWorker_p0-w0: resuming experience collection (400 times) -[2024-09-01 07:43:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3182592. Throughput: 0: 220.4. Samples: 412846. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:43:42,162][00307] Avg episode reward: [(0, '24.262')] -[2024-09-01 07:43:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3186688. Throughput: 0: 227.2. Samples: 414202. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:47,156][00307] Avg episode reward: [(0, '23.687')] -[2024-09-01 07:43:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3190784. Throughput: 0: 224.1. Samples: 414808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:52,156][00307] Avg episode reward: [(0, '23.899')] -[2024-09-01 07:43:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3194880. Throughput: 0: 219.6. Samples: 416116. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:43:57,156][00307] Avg episode reward: [(0, '22.714')] -[2024-09-01 07:44:01,606][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000782_3203072.pth... -[2024-09-01 07:44:01,718][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000730_2990080.pth -[2024-09-01 07:44:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3203072. Throughput: 0: 225.8. Samples: 417390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:02,161][00307] Avg episode reward: [(0, '22.703')] -[2024-09-01 07:44:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3207168. Throughput: 0: 226.0. Samples: 418238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:07,160][00307] Avg episode reward: [(0, '22.976')] -[2024-09-01 07:44:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3207168. Throughput: 0: 214.4. Samples: 419226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:12,156][00307] Avg episode reward: [(0, '22.956')] -[2024-09-01 07:44:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3215360. Throughput: 0: 214.4. Samples: 420582. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:44:17,157][00307] Avg episode reward: [(0, '22.943')] -[2024-09-01 07:44:20,173][16881] Updated weights for policy 0, policy_version 786 (0.0555) -[2024-09-01 07:44:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3219456. Throughput: 0: 223.8. Samples: 421320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:44:22,162][00307] Avg episode reward: [(0, '21.904')] -[2024-09-01 07:44:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3223552. Throughput: 0: 216.7. Samples: 422596. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:44:27,157][00307] Avg episode reward: [(0, '21.898')] -[2024-09-01 07:44:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3227648. Throughput: 0: 209.9. Samples: 423646. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:44:32,157][00307] Avg episode reward: [(0, '21.467')] -[2024-09-01 07:44:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3231744. Throughput: 0: 215.6. Samples: 424512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:44:37,161][00307] Avg episode reward: [(0, '21.738')] -[2024-09-01 07:44:42,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3235840. Throughput: 0: 221.0. Samples: 426060. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:42,162][00307] Avg episode reward: [(0, '22.429')] -[2024-09-01 07:44:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3239936. Throughput: 0: 217.3. Samples: 427170. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:47,163][00307] Avg episode reward: [(0, '22.632')] -[2024-09-01 07:44:52,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3244032. Throughput: 0: 208.5. Samples: 427622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:52,165][00307] Avg episode reward: [(0, '23.234')] -[2024-09-01 07:44:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3248128. Throughput: 0: 230.8. Samples: 429610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:44:57,163][00307] Avg episode reward: [(0, '23.916')] -[2024-09-01 07:45:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3252224. Throughput: 0: 223.2. Samples: 430628. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:45:02,157][00307] Avg episode reward: [(0, '24.082')] -[2024-09-01 07:45:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3256320. Throughput: 0: 218.4. Samples: 431146. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:45:07,166][00307] Avg episode reward: [(0, '24.140')] -[2024-09-01 07:45:08,260][16881] Updated weights for policy 0, policy_version 796 (0.2083) -[2024-09-01 07:45:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3264512. Throughput: 0: 223.5. Samples: 432654. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:45:12,164][00307] Avg episode reward: [(0, '24.372')] -[2024-09-01 07:45:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3268608. Throughput: 0: 232.2. Samples: 434094. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:45:17,157][00307] Avg episode reward: [(0, '24.411')] -[2024-09-01 07:45:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3272704. Throughput: 0: 227.6. Samples: 434752. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:45:22,156][00307] Avg episode reward: [(0, '25.283')] -[2024-09-01 07:45:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3276800. Throughput: 0: 216.0. Samples: 435778. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:45:27,157][00307] Avg episode reward: [(0, '25.616')] -[2024-09-01 07:45:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3280896. Throughput: 0: 228.1. Samples: 437434. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:45:32,157][00307] Avg episode reward: [(0, '24.922')] -[2024-09-01 07:45:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3284992. Throughput: 0: 230.8. Samples: 438006. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:45:37,163][00307] Avg episode reward: [(0, '24.749')] -[2024-09-01 07:45:42,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3289088. Throughput: 0: 209.1. Samples: 439020. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:45:42,162][00307] Avg episode reward: [(0, '24.729')] -[2024-09-01 07:45:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3293184. Throughput: 0: 223.2. Samples: 440674. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:45:47,160][00307] Avg episode reward: [(0, '24.682')] -[2024-09-01 07:45:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3297280. Throughput: 0: 225.3. Samples: 441284. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:45:52,157][00307] Avg episode reward: [(0, '23.968')] -[2024-09-01 07:45:53,358][16881] Updated weights for policy 0, policy_version 806 (0.1822) -[2024-09-01 07:45:57,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 3301376. Throughput: 0: 218.7. Samples: 442496. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:45:57,161][00307] Avg episode reward: [(0, '24.235')] -[2024-09-01 07:45:58,612][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000807_3305472.pth... -[2024-09-01 07:45:58,711][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000756_3096576.pth -[2024-09-01 07:46:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3305472. Throughput: 0: 214.9. Samples: 443766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:46:02,157][00307] Avg episode reward: [(0, '24.608')] -[2024-09-01 07:46:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3309568. Throughput: 0: 215.5. Samples: 444450. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:46:07,156][00307] Avg episode reward: [(0, '26.202')] -[2024-09-01 07:46:11,673][16868] Saving new best policy, reward=26.202! -[2024-09-01 07:46:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3317760. Throughput: 0: 227.6. Samples: 446020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:46:12,157][00307] Avg episode reward: [(0, '26.285')] -[2024-09-01 07:46:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3317760. Throughput: 0: 214.2. Samples: 447074. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:46:17,157][00307] Avg episode reward: [(0, '26.143')] -[2024-09-01 07:46:17,923][16868] Saving new best policy, reward=26.285! -[2024-09-01 07:46:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3321856. Throughput: 0: 215.9. Samples: 447720. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:46:22,158][00307] Avg episode reward: [(0, '26.063')] -[2024-09-01 07:46:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3330048. Throughput: 0: 224.3. Samples: 449114. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:46:27,157][00307] Avg episode reward: [(0, '25.979')] -[2024-09-01 07:46:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3334144. Throughput: 0: 219.7. Samples: 450560. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:46:32,157][00307] Avg episode reward: [(0, '26.826')] -[2024-09-01 07:46:36,238][16868] Saving new best policy, reward=26.826! -[2024-09-01 07:46:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3338240. Throughput: 0: 219.2. Samples: 451146. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:46:37,158][00307] Avg episode reward: [(0, '26.700')] -[2024-09-01 07:46:40,743][16881] Updated weights for policy 0, policy_version 816 (0.0546) -[2024-09-01 07:46:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3342336. Throughput: 0: 217.8. Samples: 452298. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:46:42,164][00307] Avg episode reward: [(0, '26.510')] -[2024-09-01 07:46:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3346432. Throughput: 0: 232.1. Samples: 454210. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:46:47,166][00307] Avg episode reward: [(0, '25.817')] -[2024-09-01 07:46:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3350528. Throughput: 0: 222.2. Samples: 454450. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:46:52,159][00307] Avg episode reward: [(0, '25.507')] -[2024-09-01 07:46:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3354624. Throughput: 0: 213.1. Samples: 455608. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:46:57,157][00307] Avg episode reward: [(0, '25.074')] -[2024-09-01 07:47:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3358720. Throughput: 0: 227.5. Samples: 457310. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:47:02,159][00307] Avg episode reward: [(0, '24.813')] -[2024-09-01 07:47:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3362816. Throughput: 0: 227.5. Samples: 457958. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:47:07,158][00307] Avg episode reward: [(0, '24.402')] -[2024-09-01 07:47:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3366912. Throughput: 0: 222.9. Samples: 459144. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:47:12,158][00307] Avg episode reward: [(0, '24.292')] -[2024-09-01 07:47:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 3371008. Throughput: 0: 218.5. Samples: 460394. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:47:17,163][00307] Avg episode reward: [(0, '22.926')] -[2024-09-01 07:47:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3379200. Throughput: 0: 227.5. Samples: 461382. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:47:22,163][00307] Avg episode reward: [(0, '22.713')] -[2024-09-01 07:47:26,181][16881] Updated weights for policy 0, policy_version 826 (0.1077) -[2024-09-01 07:47:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3383296. Throughput: 0: 224.5. Samples: 462400. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:47:27,161][00307] Avg episode reward: [(0, '22.770')] -[2024-09-01 07:47:30,257][16868] Signal inference workers to stop experience collection... (450 times) -[2024-09-01 07:47:30,367][16881] InferenceWorker_p0-w0: stopping experience collection (450 times) -[2024-09-01 07:47:31,920][16868] Signal inference workers to resume experience collection... (450 times) -[2024-09-01 07:47:31,923][16881] InferenceWorker_p0-w0: resuming experience collection (450 times) -[2024-09-01 07:47:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3387392. Throughput: 0: 197.2. Samples: 463084. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:47:32,157][00307] Avg episode reward: [(0, '22.694')] -[2024-09-01 07:47:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3391488. Throughput: 0: 221.4. Samples: 464412. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:47:37,157][00307] Avg episode reward: [(0, '22.496')] -[2024-09-01 07:47:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3395584. Throughput: 0: 226.0. Samples: 465778. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:47:42,157][00307] Avg episode reward: [(0, '22.564')] -[2024-09-01 07:47:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3399680. Throughput: 0: 214.4. Samples: 466956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:47:47,162][00307] Avg episode reward: [(0, '22.948')] -[2024-09-01 07:47:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3403776. Throughput: 0: 212.7. Samples: 467530. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:47:52,157][00307] Avg episode reward: [(0, '23.108')] -[2024-09-01 07:47:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3407872. Throughput: 0: 220.7. Samples: 469074. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:47:57,162][00307] Avg episode reward: [(0, '22.520')] -[2024-09-01 07:48:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3411968. Throughput: 0: 227.2. Samples: 470620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:48:02,156][00307] Avg episode reward: [(0, '22.712')] -[2024-09-01 07:48:03,839][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000834_3416064.pth... -[2024-09-01 07:48:03,983][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000782_3203072.pth -[2024-09-01 07:48:07,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 3416064. Throughput: 0: 211.0. Samples: 470878. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:07,161][00307] Avg episode reward: [(0, '23.034')] -[2024-09-01 07:48:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 3420160. Throughput: 0: 223.7. Samples: 472468. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:12,162][00307] Avg episode reward: [(0, '23.296')] -[2024-09-01 07:48:12,858][16881] Updated weights for policy 0, policy_version 836 (0.1610) -[2024-09-01 07:48:17,154][00307] Fps is (10 sec: 1229.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3428352. Throughput: 0: 222.4. Samples: 473092. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:48:17,163][00307] Avg episode reward: [(0, '23.139')] -[2024-09-01 07:48:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3428352. Throughput: 0: 220.2. Samples: 474322. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:48:22,157][00307] Avg episode reward: [(0, '22.348')] -[2024-09-01 07:48:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3432448. Throughput: 0: 217.1. Samples: 475548. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:48:27,163][00307] Avg episode reward: [(0, '21.409')] -[2024-09-01 07:48:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3440640. Throughput: 0: 223.6. Samples: 477018. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:32,157][00307] Avg episode reward: [(0, '21.859')] -[2024-09-01 07:48:37,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3444736. Throughput: 0: 227.4. Samples: 477762. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:37,168][00307] Avg episode reward: [(0, '22.755')] -[2024-09-01 07:48:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3448832. Throughput: 0: 217.0. Samples: 478838. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:48:42,157][00307] Avg episode reward: [(0, '23.276')] -[2024-09-01 07:48:47,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3452928. Throughput: 0: 210.7. Samples: 480102. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:48:47,162][00307] Avg episode reward: [(0, '23.020')] -[2024-09-01 07:48:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3457024. Throughput: 0: 223.5. Samples: 480934. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:52,157][00307] Avg episode reward: [(0, '22.404')] -[2024-09-01 07:48:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3461120. Throughput: 0: 221.2. Samples: 482424. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:48:57,161][00307] Avg episode reward: [(0, '22.271')] -[2024-09-01 07:48:59,777][16881] Updated weights for policy 0, policy_version 846 (0.1092) -[2024-09-01 07:49:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3465216. Throughput: 0: 232.4. Samples: 483548. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:49:02,156][00307] Avg episode reward: [(0, '22.494')] -[2024-09-01 07:49:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3469312. Throughput: 0: 219.5. Samples: 484198. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:49:07,162][00307] Avg episode reward: [(0, '22.587')] -[2024-09-01 07:49:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3473408. Throughput: 0: 232.2. Samples: 485996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:12,157][00307] Avg episode reward: [(0, '22.983')] -[2024-09-01 07:49:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3477504. Throughput: 0: 219.3. Samples: 486888. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:17,157][00307] Avg episode reward: [(0, '23.471')] -[2024-09-01 07:49:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3481600. Throughput: 0: 216.1. Samples: 487488. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:49:22,165][00307] Avg episode reward: [(0, '23.916')] -[2024-09-01 07:49:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3489792. Throughput: 0: 227.4. Samples: 489072. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:27,157][00307] Avg episode reward: [(0, '23.543')] -[2024-09-01 07:49:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3493888. Throughput: 0: 228.2. Samples: 490372. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:32,157][00307] Avg episode reward: [(0, '23.875')] -[2024-09-01 07:49:37,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3497984. Throughput: 0: 224.3. Samples: 491028. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:49:37,172][00307] Avg episode reward: [(0, '24.206')] -[2024-09-01 07:49:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3502080. Throughput: 0: 216.1. Samples: 492150. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:49:42,158][00307] Avg episode reward: [(0, '24.155')] -[2024-09-01 07:49:45,099][16881] Updated weights for policy 0, policy_version 856 (0.1729) -[2024-09-01 07:49:47,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3506176. Throughput: 0: 231.9. Samples: 493984. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:47,157][00307] Avg episode reward: [(0, '23.557')] -[2024-09-01 07:49:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3510272. Throughput: 0: 225.2. Samples: 494330. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:52,156][00307] Avg episode reward: [(0, '23.808')] -[2024-09-01 07:49:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3514368. Throughput: 0: 210.5. Samples: 495468. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:49:57,157][00307] Avg episode reward: [(0, '24.088')] -[2024-09-01 07:49:59,360][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000859_3518464.pth... -[2024-09-01 07:49:59,480][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000807_3305472.pth -[2024-09-01 07:50:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3518464. Throughput: 0: 231.6. Samples: 497312. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:02,159][00307] Avg episode reward: [(0, '23.297')] -[2024-09-01 07:50:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3522560. Throughput: 0: 232.2. Samples: 497938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:07,156][00307] Avg episode reward: [(0, '22.751')] -[2024-09-01 07:50:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3526656. Throughput: 0: 218.7. Samples: 498912. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:12,162][00307] Avg episode reward: [(0, '22.420')] -[2024-09-01 07:50:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3530752. Throughput: 0: 222.5. Samples: 500386. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:50:17,160][00307] Avg episode reward: [(0, '22.420')] -[2024-09-01 07:50:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3538944. Throughput: 0: 222.9. Samples: 501058. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:50:22,163][00307] Avg episode reward: [(0, '22.846')] -[2024-09-01 07:50:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3538944. Throughput: 0: 227.9. Samples: 502404. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:50:27,157][00307] Avg episode reward: [(0, '22.553')] -[2024-09-01 07:50:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3543040. Throughput: 0: 209.4. Samples: 503408. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:50:32,158][00307] Avg episode reward: [(0, '22.030')] -[2024-09-01 07:50:32,359][16881] Updated weights for policy 0, policy_version 866 (0.1544) -[2024-09-01 07:50:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3551232. Throughput: 0: 224.1. Samples: 504414. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 07:50:37,157][00307] Avg episode reward: [(0, '22.160')] -[2024-09-01 07:50:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3555328. Throughput: 0: 222.6. Samples: 505484. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:42,157][00307] Avg episode reward: [(0, '22.573')] -[2024-09-01 07:50:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3559424. Throughput: 0: 208.2. Samples: 506682. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:47,162][00307] Avg episode reward: [(0, '22.889')] -[2024-09-01 07:50:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3563520. Throughput: 0: 212.5. Samples: 507502. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:52,157][00307] Avg episode reward: [(0, '23.977')] -[2024-09-01 07:50:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3567616. Throughput: 0: 225.2. Samples: 509044. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:50:57,159][00307] Avg episode reward: [(0, '24.614')] -[2024-09-01 07:51:02,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3571712. Throughput: 0: 222.9. Samples: 510416. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:02,162][00307] Avg episode reward: [(0, '23.902')] -[2024-09-01 07:51:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3575808. Throughput: 0: 213.7. Samples: 510676. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:07,162][00307] Avg episode reward: [(0, '23.722')] -[2024-09-01 07:51:12,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3579904. Throughput: 0: 219.6. Samples: 512286. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:12,166][00307] Avg episode reward: [(0, '23.701')] -[2024-09-01 07:51:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3588096. Throughput: 0: 227.6. Samples: 513648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:17,166][00307] Avg episode reward: [(0, '23.746')] -[2024-09-01 07:51:17,710][16881] Updated weights for policy 0, policy_version 876 (0.1707) -[2024-09-01 07:51:21,651][16868] Signal inference workers to stop experience collection... (500 times) -[2024-09-01 07:51:21,714][16881] InferenceWorker_p0-w0: stopping experience collection (500 times) -[2024-09-01 07:51:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3588096. Throughput: 0: 218.4. Samples: 514244. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:22,161][00307] Avg episode reward: [(0, '23.498')] -[2024-09-01 07:51:22,943][16868] Signal inference workers to resume experience collection... (500 times) -[2024-09-01 07:51:22,944][16881] InferenceWorker_p0-w0: resuming experience collection (500 times) -[2024-09-01 07:51:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3592192. Throughput: 0: 222.0. Samples: 515476. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:27,164][00307] Avg episode reward: [(0, '23.698')] -[2024-09-01 07:51:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3600384. Throughput: 0: 222.5. Samples: 516694. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:32,163][00307] Avg episode reward: [(0, '24.818')] -[2024-09-01 07:51:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3604480. Throughput: 0: 226.4. Samples: 517688. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:51:37,158][00307] Avg episode reward: [(0, '24.743')] -[2024-09-01 07:51:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3608576. Throughput: 0: 214.3. Samples: 518688. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:51:42,160][00307] Avg episode reward: [(0, '24.706')] -[2024-09-01 07:51:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3612672. Throughput: 0: 214.3. Samples: 520060. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:47,162][00307] Avg episode reward: [(0, '24.971')] -[2024-09-01 07:51:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3616768. Throughput: 0: 222.0. Samples: 520668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 07:51:52,157][00307] Avg episode reward: [(0, '25.373')] -[2024-09-01 07:51:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3620864. Throughput: 0: 219.2. Samples: 522150. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:51:57,160][00307] Avg episode reward: [(0, '25.378')] -[2024-09-01 07:51:59,849][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000885_3624960.pth... -[2024-09-01 07:52:00,009][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000834_3416064.pth -[2024-09-01 07:52:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3624960. Throughput: 0: 215.2. Samples: 523330. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:52:02,157][00307] Avg episode reward: [(0, '24.600')] -[2024-09-01 07:52:04,866][16881] Updated weights for policy 0, policy_version 886 (0.1585) -[2024-09-01 07:52:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3629056. Throughput: 0: 212.0. Samples: 523782. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:52:07,163][00307] Avg episode reward: [(0, '25.479')] -[2024-09-01 07:52:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3633152. Throughput: 0: 226.2. Samples: 525654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:52:12,157][00307] Avg episode reward: [(0, '25.518')] -[2024-09-01 07:52:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3637248. Throughput: 0: 223.6. Samples: 526754. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:17,159][00307] Avg episode reward: [(0, '25.513')] -[2024-09-01 07:52:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3641344. Throughput: 0: 213.0. Samples: 527274. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:22,165][00307] Avg episode reward: [(0, '26.150')] -[2024-09-01 07:52:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3649536. Throughput: 0: 226.8. Samples: 528892. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:52:27,157][00307] Avg episode reward: [(0, '26.293')] -[2024-09-01 07:52:32,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3653632. Throughput: 0: 223.5. Samples: 530120. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:52:32,161][00307] Avg episode reward: [(0, '26.263')] -[2024-09-01 07:52:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3657728. Throughput: 0: 224.7. Samples: 530780. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 07:52:37,158][00307] Avg episode reward: [(0, '26.155')] -[2024-09-01 07:52:42,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3661824. Throughput: 0: 217.6. Samples: 531944. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:42,157][00307] Avg episode reward: [(0, '25.701')] -[2024-09-01 07:52:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3665920. Throughput: 0: 233.9. Samples: 533854. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:47,164][00307] Avg episode reward: [(0, '24.761')] -[2024-09-01 07:52:49,736][16881] Updated weights for policy 0, policy_version 896 (0.0547) -[2024-09-01 07:52:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3670016. Throughput: 0: 228.8. Samples: 534078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:52,157][00307] Avg episode reward: [(0, '24.610')] -[2024-09-01 07:52:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3674112. Throughput: 0: 213.6. Samples: 535266. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:52:57,163][00307] Avg episode reward: [(0, '23.996')] -[2024-09-01 07:53:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3678208. Throughput: 0: 227.0. Samples: 536970. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:02,161][00307] Avg episode reward: [(0, '24.336')] -[2024-09-01 07:53:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3682304. Throughput: 0: 228.6. Samples: 537562. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:07,157][00307] Avg episode reward: [(0, '24.850')] -[2024-09-01 07:53:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3686400. Throughput: 0: 216.9. Samples: 538652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:12,161][00307] Avg episode reward: [(0, '25.205')] -[2024-09-01 07:53:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3690496. Throughput: 0: 219.3. Samples: 539990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:17,162][00307] Avg episode reward: [(0, '24.787')] -[2024-09-01 07:53:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3698688. Throughput: 0: 225.6. Samples: 540934. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:22,158][00307] Avg episode reward: [(0, '24.867')] -[2024-09-01 07:53:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3698688. Throughput: 0: 225.6. Samples: 542098. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:53:27,160][00307] Avg episode reward: [(0, '24.400')] -[2024-09-01 07:53:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3702784. Throughput: 0: 206.1. Samples: 543130. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:53:32,165][00307] Avg episode reward: [(0, '24.080')] -[2024-09-01 07:53:36,296][16881] Updated weights for policy 0, policy_version 906 (0.1582) -[2024-09-01 07:53:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3710976. Throughput: 0: 221.6. Samples: 544052. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:53:37,163][00307] Avg episode reward: [(0, '24.213')] -[2024-09-01 07:53:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3715072. Throughput: 0: 225.5. Samples: 545414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:53:42,157][00307] Avg episode reward: [(0, '23.507')] -[2024-09-01 07:53:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3719168. Throughput: 0: 210.2. Samples: 546428. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:53:47,158][00307] Avg episode reward: [(0, '23.382')] -[2024-09-01 07:53:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3723264. Throughput: 0: 214.0. Samples: 547194. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:53:52,158][00307] Avg episode reward: [(0, '23.956')] -[2024-09-01 07:53:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3727360. Throughput: 0: 219.7. Samples: 548540. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:53:57,157][00307] Avg episode reward: [(0, '23.994')] -[2024-09-01 07:53:58,665][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000911_3731456.pth... -[2024-09-01 07:53:58,829][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000859_3518464.pth -[2024-09-01 07:54:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3731456. Throughput: 0: 223.2. Samples: 550032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:54:02,163][00307] Avg episode reward: [(0, '24.208')] -[2024-09-01 07:54:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3735552. Throughput: 0: 208.1. Samples: 550300. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:54:07,157][00307] Avg episode reward: [(0, '24.067')] -[2024-09-01 07:54:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3739648. Throughput: 0: 219.1. Samples: 551956. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:54:12,168][00307] Avg episode reward: [(0, '23.776')] -[2024-09-01 07:54:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3743744. Throughput: 0: 228.1. Samples: 553394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:17,157][00307] Avg episode reward: [(0, '23.504')] -[2024-09-01 07:54:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3747840. Throughput: 0: 219.1. Samples: 553910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:22,156][00307] Avg episode reward: [(0, '23.612')] -[2024-09-01 07:54:24,366][16881] Updated weights for policy 0, policy_version 916 (0.2118) -[2024-09-01 07:54:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3751936. Throughput: 0: 214.8. Samples: 555080. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:27,164][00307] Avg episode reward: [(0, '23.434')] -[2024-09-01 07:54:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3760128. Throughput: 0: 226.7. Samples: 556630. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:32,157][00307] Avg episode reward: [(0, '23.453')] -[2024-09-01 07:54:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3764224. Throughput: 0: 228.0. Samples: 557452. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:37,157][00307] Avg episode reward: [(0, '24.045')] -[2024-09-01 07:54:42,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3764224. Throughput: 0: 220.8. Samples: 558474. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:42,157][00307] Avg episode reward: [(0, '23.239')] -[2024-09-01 07:54:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3772416. Throughput: 0: 218.0. Samples: 559844. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:47,164][00307] Avg episode reward: [(0, '23.614')] -[2024-09-01 07:54:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3776512. Throughput: 0: 227.2. Samples: 560526. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:52,157][00307] Avg episode reward: [(0, '23.775')] -[2024-09-01 07:54:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3780608. Throughput: 0: 219.1. Samples: 561816. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:54:57,161][00307] Avg episode reward: [(0, '24.080')] -[2024-09-01 07:55:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3784704. Throughput: 0: 218.3. Samples: 563218. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:55:02,160][00307] Avg episode reward: [(0, '24.563')] -[2024-09-01 07:55:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3788800. Throughput: 0: 221.5. Samples: 563876. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:55:07,159][00307] Avg episode reward: [(0, '24.229')] -[2024-09-01 07:55:08,237][16881] Updated weights for policy 0, policy_version 926 (0.0045) -[2024-09-01 07:55:11,427][16868] Signal inference workers to stop experience collection... (550 times) -[2024-09-01 07:55:11,579][16881] InferenceWorker_p0-w0: stopping experience collection (550 times) -[2024-09-01 07:55:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3792896. Throughput: 0: 232.3. Samples: 565532. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:55:12,156][00307] Avg episode reward: [(0, '24.161')] -[2024-09-01 07:55:13,405][16868] Signal inference workers to resume experience collection... (550 times) -[2024-09-01 07:55:13,406][16881] InferenceWorker_p0-w0: resuming experience collection (550 times) -[2024-09-01 07:55:17,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3796992. Throughput: 0: 219.7. Samples: 566518. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:55:17,164][00307] Avg episode reward: [(0, '24.181')] -[2024-09-01 07:55:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3801088. Throughput: 0: 212.2. Samples: 567002. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:55:22,160][00307] Avg episode reward: [(0, '23.936')] -[2024-09-01 07:55:27,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3809280. Throughput: 0: 228.5. Samples: 568758. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:55:27,158][00307] Avg episode reward: [(0, '23.674')] -[2024-09-01 07:55:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3813376. Throughput: 0: 221.2. Samples: 569796. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:55:32,158][00307] Avg episode reward: [(0, '23.705')] -[2024-09-01 07:55:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3817472. Throughput: 0: 222.0. Samples: 570516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:55:37,159][00307] Avg episode reward: [(0, '23.496')] -[2024-09-01 07:55:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3821568. Throughput: 0: 222.8. Samples: 571842. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 07:55:42,163][00307] Avg episode reward: [(0, '22.910')] -[2024-09-01 07:55:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3825664. Throughput: 0: 222.2. Samples: 573216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:55:47,158][00307] Avg episode reward: [(0, '23.456')] -[2024-09-01 07:55:52,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3829760. Throughput: 0: 221.0. Samples: 573820. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:55:52,161][00307] Avg episode reward: [(0, '23.456')] -[2024-09-01 07:55:55,840][16881] Updated weights for policy 0, policy_version 936 (0.1525) -[2024-09-01 07:55:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3833856. Throughput: 0: 208.3. Samples: 574904. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:55:57,156][00307] Avg episode reward: [(0, '23.924')] -[2024-09-01 07:55:59,135][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000937_3837952.pth... -[2024-09-01 07:55:59,263][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000885_3624960.pth -[2024-09-01 07:56:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3837952. Throughput: 0: 231.0. Samples: 576914. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:56:02,165][00307] Avg episode reward: [(0, '23.635')] -[2024-09-01 07:56:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3842048. Throughput: 0: 227.0. Samples: 577216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:07,157][00307] Avg episode reward: [(0, '24.100')] -[2024-09-01 07:56:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 3846144. Throughput: 0: 213.6. Samples: 578370. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:12,158][00307] Avg episode reward: [(0, '23.857')] -[2024-09-01 07:56:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3850240. Throughput: 0: 226.1. Samples: 579970. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:17,157][00307] Avg episode reward: [(0, '23.926')] -[2024-09-01 07:56:22,156][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3858432. Throughput: 0: 231.1. Samples: 580918. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:22,160][00307] Avg episode reward: [(0, '23.425')] -[2024-09-01 07:56:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3858432. Throughput: 0: 227.6. Samples: 582082. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:27,161][00307] Avg episode reward: [(0, '23.297')] -[2024-09-01 07:56:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3866624. Throughput: 0: 219.2. Samples: 583080. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:32,162][00307] Avg episode reward: [(0, '23.463')] -[2024-09-01 07:56:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3870720. Throughput: 0: 229.6. Samples: 584150. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:37,159][00307] Avg episode reward: [(0, '23.991')] -[2024-09-01 07:56:40,157][16881] Updated weights for policy 0, policy_version 946 (0.1079) -[2024-09-01 07:56:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3874816. Throughput: 0: 233.2. Samples: 585396. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:42,164][00307] Avg episode reward: [(0, '23.827')] -[2024-09-01 07:56:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3878912. Throughput: 0: 211.8. Samples: 586444. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:56:47,164][00307] Avg episode reward: [(0, '23.695')] -[2024-09-01 07:56:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3883008. Throughput: 0: 222.0. Samples: 587204. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:56:52,157][00307] Avg episode reward: [(0, '23.015')] -[2024-09-01 07:56:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3887104. Throughput: 0: 233.5. Samples: 588876. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:56:57,164][00307] Avg episode reward: [(0, '23.186')] -[2024-09-01 07:57:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3891200. Throughput: 0: 224.9. Samples: 590092. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:02,157][00307] Avg episode reward: [(0, '22.959')] -[2024-09-01 07:57:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3895296. Throughput: 0: 211.8. Samples: 590450. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:07,157][00307] Avg episode reward: [(0, '22.556')] -[2024-09-01 07:57:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3899392. Throughput: 0: 224.6. Samples: 592190. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:57:12,167][00307] Avg episode reward: [(0, '22.642')] -[2024-09-01 07:57:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3907584. Throughput: 0: 228.2. Samples: 593350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:57:17,163][00307] Avg episode reward: [(0, '23.021')] -[2024-09-01 07:57:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3907584. Throughput: 0: 220.1. Samples: 594054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:57:22,156][00307] Avg episode reward: [(0, '24.229')] -[2024-09-01 07:57:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 3915776. Throughput: 0: 220.5. Samples: 595318. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:27,164][00307] Avg episode reward: [(0, '24.504')] -[2024-09-01 07:57:27,323][16881] Updated weights for policy 0, policy_version 956 (0.1121) -[2024-09-01 07:57:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3919872. Throughput: 0: 232.4. Samples: 596904. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:32,158][00307] Avg episode reward: [(0, '25.222')] -[2024-09-01 07:57:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3923968. Throughput: 0: 224.8. Samples: 597322. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:37,159][00307] Avg episode reward: [(0, '24.849')] -[2024-09-01 07:57:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3928064. Throughput: 0: 209.6. Samples: 598308. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:42,161][00307] Avg episode reward: [(0, '25.002')] -[2024-09-01 07:57:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3932160. Throughput: 0: 225.3. Samples: 600230. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:57:47,164][00307] Avg episode reward: [(0, '25.007')] -[2024-09-01 07:57:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3936256. Throughput: 0: 232.0. Samples: 600892. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:57:52,161][00307] Avg episode reward: [(0, '24.433')] -[2024-09-01 07:57:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3940352. Throughput: 0: 217.5. Samples: 601978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:57:57,157][00307] Avg episode reward: [(0, '23.823')] -[2024-09-01 07:57:59,259][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000963_3944448.pth... -[2024-09-01 07:57:59,375][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000911_3731456.pth -[2024-09-01 07:58:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3944448. Throughput: 0: 225.1. Samples: 603480. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:02,165][00307] Avg episode reward: [(0, '23.778')] -[2024-09-01 07:58:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3952640. Throughput: 0: 226.4. Samples: 604244. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:07,156][00307] Avg episode reward: [(0, '24.372')] -[2024-09-01 07:58:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3952640. Throughput: 0: 226.3. Samples: 605500. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:12,157][00307] Avg episode reward: [(0, '24.471')] -[2024-09-01 07:58:13,379][16881] Updated weights for policy 0, policy_version 966 (0.1288) -[2024-09-01 07:58:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 3956736. Throughput: 0: 213.2. Samples: 606496. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:58:17,165][00307] Avg episode reward: [(0, '24.398')] -[2024-09-01 07:58:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 3964928. Throughput: 0: 220.6. Samples: 607250. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:22,163][00307] Avg episode reward: [(0, '24.747')] -[2024-09-01 07:58:27,161][00307] Fps is (10 sec: 1227.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 3969024. Throughput: 0: 232.1. Samples: 608756. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:27,168][00307] Avg episode reward: [(0, '24.494')] -[2024-09-01 07:58:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3973120. Throughput: 0: 212.5. Samples: 609794. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:58:32,157][00307] Avg episode reward: [(0, '24.942')] -[2024-09-01 07:58:37,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3977216. Throughput: 0: 216.2. Samples: 610622. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:58:37,156][00307] Avg episode reward: [(0, '25.176')] -[2024-09-01 07:58:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3981312. Throughput: 0: 223.0. Samples: 612014. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:58:42,164][00307] Avg episode reward: [(0, '25.352')] -[2024-09-01 07:58:47,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3985408. Throughput: 0: 220.0. Samples: 613382. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 07:58:47,163][00307] Avg episode reward: [(0, '25.528')] -[2024-09-01 07:58:52,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3989504. Throughput: 0: 212.8. Samples: 613818. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:52,167][00307] Avg episode reward: [(0, '25.701')] -[2024-09-01 07:58:57,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 3993600. Throughput: 0: 217.5. Samples: 615286. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:58:57,157][00307] Avg episode reward: [(0, '26.064')] -[2024-09-01 07:58:58,797][16881] Updated weights for policy 0, policy_version 976 (0.1049) -[2024-09-01 07:59:01,187][16868] Signal inference workers to stop experience collection... (600 times) -[2024-09-01 07:59:01,239][16881] InferenceWorker_p0-w0: stopping experience collection (600 times) -[2024-09-01 07:59:02,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 3997696. Throughput: 0: 231.8. Samples: 616926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:59:02,160][00307] Avg episode reward: [(0, '25.812')] -[2024-09-01 07:59:02,887][16868] Signal inference workers to resume experience collection... (600 times) -[2024-09-01 07:59:02,888][16881] InferenceWorker_p0-w0: resuming experience collection (600 times) -[2024-09-01 07:59:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4001792. Throughput: 0: 226.4. Samples: 617436. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:59:07,162][00307] Avg episode reward: [(0, '25.828')] -[2024-09-01 07:59:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4005888. Throughput: 0: 217.0. Samples: 618518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:59:12,157][00307] Avg episode reward: [(0, '26.012')] -[2024-09-01 07:59:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4009984. Throughput: 0: 226.5. Samples: 619986. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 07:59:17,165][00307] Avg episode reward: [(0, '25.592')] -[2024-09-01 07:59:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4018176. Throughput: 0: 230.8. Samples: 621006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:22,160][00307] Avg episode reward: [(0, '25.135')] -[2024-09-01 07:59:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 4018176. Throughput: 0: 223.3. Samples: 622062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:27,157][00307] Avg episode reward: [(0, '24.751')] -[2024-09-01 07:59:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4026368. Throughput: 0: 217.2. Samples: 623154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:32,167][00307] Avg episode reward: [(0, '24.658')] -[2024-09-01 07:59:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4030464. Throughput: 0: 229.4. Samples: 624140. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:59:37,157][00307] Avg episode reward: [(0, '25.140')] -[2024-09-01 07:59:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4034560. Throughput: 0: 222.8. Samples: 625312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 07:59:42,163][00307] Avg episode reward: [(0, '24.925')] -[2024-09-01 07:59:46,187][16881] Updated weights for policy 0, policy_version 986 (0.1050) -[2024-09-01 07:59:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4038656. Throughput: 0: 212.5. Samples: 626488. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:47,156][00307] Avg episode reward: [(0, '25.362')] -[2024-09-01 07:59:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4042752. Throughput: 0: 216.0. Samples: 627156. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:52,167][00307] Avg episode reward: [(0, '25.625')] -[2024-09-01 07:59:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4046848. Throughput: 0: 234.5. Samples: 629070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 07:59:57,156][00307] Avg episode reward: [(0, '26.444')] -[2024-09-01 08:00:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4050944. Throughput: 0: 221.6. Samples: 629960. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:00:02,160][00307] Avg episode reward: [(0, '26.873')] -[2024-09-01 08:00:04,446][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000990_4055040.pth... -[2024-09-01 08:00:04,563][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000937_3837952.pth -[2024-09-01 08:00:04,582][16868] Saving new best policy, reward=26.873! -[2024-09-01 08:00:07,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4055040. Throughput: 0: 210.3. Samples: 630472. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:00:07,167][00307] Avg episode reward: [(0, '26.993')] -[2024-09-01 08:00:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4059136. Throughput: 0: 225.6. Samples: 632212. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:00:12,166][00307] Avg episode reward: [(0, '26.909')] -[2024-09-01 08:00:12,409][16868] Saving new best policy, reward=26.993! -[2024-09-01 08:00:17,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4063232. Throughput: 0: 224.2. Samples: 633242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:00:17,162][00307] Avg episode reward: [(0, '26.253')] -[2024-09-01 08:00:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 4067328. Throughput: 0: 213.3. Samples: 633738. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:00:22,158][00307] Avg episode reward: [(0, '25.801')] -[2024-09-01 08:00:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4075520. Throughput: 0: 223.8. Samples: 635382. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:00:27,156][00307] Avg episode reward: [(0, '25.918')] -[2024-09-01 08:00:30,793][16881] Updated weights for policy 0, policy_version 996 (0.0080) -[2024-09-01 08:00:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4079616. Throughput: 0: 228.1. Samples: 636752. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:32,161][00307] Avg episode reward: [(0, '25.471')] -[2024-09-01 08:00:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4083712. Throughput: 0: 227.8. Samples: 637408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:37,160][00307] Avg episode reward: [(0, '25.445')] -[2024-09-01 08:00:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4087808. Throughput: 0: 206.8. Samples: 638376. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:42,157][00307] Avg episode reward: [(0, '25.472')] -[2024-09-01 08:00:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4091904. Throughput: 0: 226.1. Samples: 640134. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:47,157][00307] Avg episode reward: [(0, '24.953')] -[2024-09-01 08:00:52,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4096000. Throughput: 0: 223.3. Samples: 640520. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:52,163][00307] Avg episode reward: [(0, '25.054')] -[2024-09-01 08:00:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4100096. Throughput: 0: 210.6. Samples: 641690. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:00:57,160][00307] Avg episode reward: [(0, '25.144')] -[2024-09-01 08:01:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4104192. Throughput: 0: 227.1. Samples: 643460. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:02,167][00307] Avg episode reward: [(0, '24.713')] -[2024-09-01 08:01:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4108288. Throughput: 0: 230.4. Samples: 644104. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:07,164][00307] Avg episode reward: [(0, '25.032')] -[2024-09-01 08:01:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4112384. Throughput: 0: 222.4. Samples: 645390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:12,157][00307] Avg episode reward: [(0, '25.132')] -[2024-09-01 08:01:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4116480. Throughput: 0: 217.0. Samples: 646516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:17,162][00307] Avg episode reward: [(0, '24.805')] -[2024-09-01 08:01:18,428][16881] Updated weights for policy 0, policy_version 1006 (0.1074) -[2024-09-01 08:01:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4124672. Throughput: 0: 217.4. Samples: 647192. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:01:22,157][00307] Avg episode reward: [(0, '24.096')] -[2024-09-01 08:01:27,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4128768. Throughput: 0: 231.5. Samples: 648792. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:27,161][00307] Avg episode reward: [(0, '23.914')] -[2024-09-01 08:01:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4132864. Throughput: 0: 215.5. Samples: 649832. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:01:32,158][00307] Avg episode reward: [(0, '23.521')] -[2024-09-01 08:01:37,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4136960. Throughput: 0: 226.8. Samples: 650724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:01:37,156][00307] Avg episode reward: [(0, '23.283')] -[2024-09-01 08:01:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4141056. Throughput: 0: 232.5. Samples: 652152. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:01:42,157][00307] Avg episode reward: [(0, '23.227')] -[2024-09-01 08:01:47,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4145152. Throughput: 0: 222.1. Samples: 653456. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:01:47,162][00307] Avg episode reward: [(0, '22.980')] -[2024-09-01 08:01:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4149248. Throughput: 0: 215.6. Samples: 653804. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:01:52,159][00307] Avg episode reward: [(0, '22.643')] -[2024-09-01 08:01:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4153344. Throughput: 0: 225.2. Samples: 655524. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:01:57,157][00307] Avg episode reward: [(0, '21.812')] -[2024-09-01 08:02:02,035][16881] Updated weights for policy 0, policy_version 1016 (0.1166) -[2024-09-01 08:02:02,061][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001016_4161536.pth... -[2024-09-01 08:02:02,158][00307] Fps is (10 sec: 1228.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4161536. Throughput: 0: 227.2. Samples: 656740. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:02,161][00307] Avg episode reward: [(0, '21.970')] -[2024-09-01 08:02:02,268][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000963_3944448.pth -[2024-09-01 08:02:07,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4161536. Throughput: 0: 228.5. Samples: 657474. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:07,160][00307] Avg episode reward: [(0, '22.274')] -[2024-09-01 08:02:12,154][00307] Fps is (10 sec: 409.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4165632. Throughput: 0: 223.1. Samples: 658832. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:12,158][00307] Avg episode reward: [(0, '22.280')] -[2024-09-01 08:02:17,154][00307] Fps is (10 sec: 1229.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4173824. Throughput: 0: 229.0. Samples: 660138. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:02:17,159][00307] Avg episode reward: [(0, '23.124')] -[2024-09-01 08:02:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4177920. Throughput: 0: 226.3. Samples: 660906. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:22,158][00307] Avg episode reward: [(0, '22.620')] -[2024-09-01 08:02:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4182016. Throughput: 0: 217.7. Samples: 661950. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:27,156][00307] Avg episode reward: [(0, '22.712')] -[2024-09-01 08:02:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4186112. Throughput: 0: 220.9. Samples: 663396. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:32,158][00307] Avg episode reward: [(0, '22.538')] -[2024-09-01 08:02:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4190208. Throughput: 0: 232.1. Samples: 664248. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:37,158][00307] Avg episode reward: [(0, '22.470')] -[2024-09-01 08:02:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4194304. Throughput: 0: 222.0. Samples: 665514. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:42,160][00307] Avg episode reward: [(0, '23.050')] -[2024-09-01 08:02:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4198400. Throughput: 0: 226.1. Samples: 666914. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:47,158][00307] Avg episode reward: [(0, '23.116')] -[2024-09-01 08:02:49,181][16881] Updated weights for policy 0, policy_version 1026 (0.1631) -[2024-09-01 08:02:51,497][16868] Signal inference workers to stop experience collection... (650 times) -[2024-09-01 08:02:51,535][16881] InferenceWorker_p0-w0: stopping experience collection (650 times) -[2024-09-01 08:02:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4202496. Throughput: 0: 220.4. Samples: 667390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:52,157][00307] Avg episode reward: [(0, '23.219')] -[2024-09-01 08:02:52,549][16868] Signal inference workers to resume experience collection... (650 times) -[2024-09-01 08:02:52,551][16881] InferenceWorker_p0-w0: resuming experience collection (650 times) -[2024-09-01 08:02:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4206592. Throughput: 0: 227.9. Samples: 669086. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:02:57,163][00307] Avg episode reward: [(0, '23.180')] -[2024-09-01 08:03:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 4210688. Throughput: 0: 221.0. Samples: 670084. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:02,156][00307] Avg episode reward: [(0, '23.410')] -[2024-09-01 08:03:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 4218880. Throughput: 0: 223.1. Samples: 670946. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:07,158][00307] Avg episode reward: [(0, '23.173')] -[2024-09-01 08:03:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4222976. Throughput: 0: 226.8. Samples: 672158. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:12,157][00307] Avg episode reward: [(0, '23.373')] -[2024-09-01 08:03:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4227072. Throughput: 0: 218.8. Samples: 673242. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:17,165][00307] Avg episode reward: [(0, '24.215')] -[2024-09-01 08:03:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4231168. Throughput: 0: 215.4. Samples: 673940. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:22,158][00307] Avg episode reward: [(0, '24.904')] -[2024-09-01 08:03:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4235264. Throughput: 0: 222.3. Samples: 675518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:27,156][00307] Avg episode reward: [(0, '25.303')] -[2024-09-01 08:03:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4239360. Throughput: 0: 229.3. Samples: 677234. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:32,163][00307] Avg episode reward: [(0, '25.586')] -[2024-09-01 08:03:34,099][16881] Updated weights for policy 0, policy_version 1036 (0.1755) -[2024-09-01 08:03:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4243456. Throughput: 0: 224.6. Samples: 677496. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:37,161][00307] Avg episode reward: [(0, '25.296')] -[2024-09-01 08:03:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4247552. Throughput: 0: 217.8. Samples: 678888. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:42,164][00307] Avg episode reward: [(0, '24.968')] -[2024-09-01 08:03:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4251648. Throughput: 0: 228.9. Samples: 680386. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:47,157][00307] Avg episode reward: [(0, '24.428')] -[2024-09-01 08:03:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4255744. Throughput: 0: 229.8. Samples: 681286. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:52,156][00307] Avg episode reward: [(0, '24.375')] -[2024-09-01 08:03:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4259840. Throughput: 0: 220.3. Samples: 682070. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:03:57,157][00307] Avg episode reward: [(0, '24.851')] -[2024-09-01 08:04:01,337][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001042_4268032.pth... -[2024-09-01 08:04:01,435][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000990_4055040.pth -[2024-09-01 08:04:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4268032. Throughput: 0: 231.2. Samples: 683646. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:02,156][00307] Avg episode reward: [(0, '24.479')] -[2024-09-01 08:04:07,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4272128. Throughput: 0: 232.2. Samples: 684388. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:07,169][00307] Avg episode reward: [(0, '25.010')] -[2024-09-01 08:04:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4276224. Throughput: 0: 221.2. Samples: 685474. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:12,156][00307] Avg episode reward: [(0, '25.010')] -[2024-09-01 08:04:17,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4280320. Throughput: 0: 212.3. Samples: 686788. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:17,165][00307] Avg episode reward: [(0, '26.010')] -[2024-09-01 08:04:20,177][16881] Updated weights for policy 0, policy_version 1046 (0.1722) -[2024-09-01 08:04:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4284416. Throughput: 0: 223.0. Samples: 687532. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:22,156][00307] Avg episode reward: [(0, '26.635')] -[2024-09-01 08:04:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4288512. Throughput: 0: 226.8. Samples: 689096. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:27,156][00307] Avg episode reward: [(0, '26.350')] -[2024-09-01 08:04:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4292608. Throughput: 0: 216.2. Samples: 690114. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:32,157][00307] Avg episode reward: [(0, '26.303')] -[2024-09-01 08:04:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4296704. Throughput: 0: 208.4. Samples: 690662. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:37,157][00307] Avg episode reward: [(0, '26.480')] -[2024-09-01 08:04:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4300800. Throughput: 0: 235.0. Samples: 692646. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:42,157][00307] Avg episode reward: [(0, '26.825')] -[2024-09-01 08:04:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4304896. Throughput: 0: 220.5. Samples: 693568. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:47,158][00307] Avg episode reward: [(0, '26.798')] -[2024-09-01 08:04:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4308992. Throughput: 0: 214.9. Samples: 694060. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:04:52,165][00307] Avg episode reward: [(0, '26.240')] -[2024-09-01 08:04:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4317184. Throughput: 0: 229.7. Samples: 695810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:04:57,164][00307] Avg episode reward: [(0, '26.098')] -[2024-09-01 08:05:02,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4321280. Throughput: 0: 228.5. Samples: 697072. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:02,159][00307] Avg episode reward: [(0, '26.118')] -[2024-09-01 08:05:07,061][16881] Updated weights for policy 0, policy_version 1056 (0.1732) -[2024-09-01 08:05:07,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4325376. Throughput: 0: 228.1. Samples: 697796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:07,162][00307] Avg episode reward: [(0, '26.048')] -[2024-09-01 08:05:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4329472. Throughput: 0: 217.2. Samples: 698868. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:12,163][00307] Avg episode reward: [(0, '26.579')] -[2024-09-01 08:05:17,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4333568. Throughput: 0: 234.9. Samples: 700686. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:17,164][00307] Avg episode reward: [(0, '26.899')] -[2024-09-01 08:05:22,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4337664. Throughput: 0: 230.9. Samples: 701054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:22,161][00307] Avg episode reward: [(0, '26.413')] -[2024-09-01 08:05:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4341760. Throughput: 0: 210.6. Samples: 702122. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:27,159][00307] Avg episode reward: [(0, '26.125')] -[2024-09-01 08:05:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4345856. Throughput: 0: 230.1. Samples: 703922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:32,164][00307] Avg episode reward: [(0, '26.056')] -[2024-09-01 08:05:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4349952. Throughput: 0: 232.8. Samples: 704534. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:37,161][00307] Avg episode reward: [(0, '25.909')] -[2024-09-01 08:05:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4354048. Throughput: 0: 216.6. Samples: 705558. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:42,163][00307] Avg episode reward: [(0, '25.904')] -[2024-09-01 08:05:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4358144. Throughput: 0: 222.6. Samples: 707088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:47,162][00307] Avg episode reward: [(0, '26.018')] -[2024-09-01 08:05:51,461][16881] Updated weights for policy 0, policy_version 1066 (0.2246) -[2024-09-01 08:05:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4366336. Throughput: 0: 227.8. Samples: 708048. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:52,162][00307] Avg episode reward: [(0, '26.674')] -[2024-09-01 08:05:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4370432. Throughput: 0: 226.0. Samples: 709040. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:05:57,160][00307] Avg episode reward: [(0, '26.785')] -[2024-09-01 08:06:01,767][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001068_4374528.pth... -[2024-09-01 08:06:01,885][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001016_4161536.pth -[2024-09-01 08:06:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4374528. Throughput: 0: 210.4. Samples: 710152. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:06:02,157][00307] Avg episode reward: [(0, '27.121')] -[2024-09-01 08:06:05,782][16868] Saving new best policy, reward=27.121! -[2024-09-01 08:06:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4378624. Throughput: 0: 223.3. Samples: 711100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:06:07,158][00307] Avg episode reward: [(0, '27.097')] -[2024-09-01 08:06:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4382720. Throughput: 0: 234.7. Samples: 712684. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:12,157][00307] Avg episode reward: [(0, '26.603')] -[2024-09-01 08:06:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4386816. Throughput: 0: 216.1. Samples: 713648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:17,157][00307] Avg episode reward: [(0, '25.979')] -[2024-09-01 08:06:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4390912. Throughput: 0: 214.4. Samples: 714182. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:22,164][00307] Avg episode reward: [(0, '25.186')] -[2024-09-01 08:06:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4395008. Throughput: 0: 230.6. Samples: 715934. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:27,165][00307] Avg episode reward: [(0, '25.660')] -[2024-09-01 08:06:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4399104. Throughput: 0: 226.2. Samples: 717266. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:32,162][00307] Avg episode reward: [(0, '25.443')] -[2024-09-01 08:06:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4403200. Throughput: 0: 214.2. Samples: 717686. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:37,165][00307] Avg episode reward: [(0, '25.460')] -[2024-09-01 08:06:38,746][16881] Updated weights for policy 0, policy_version 1076 (0.0536) -[2024-09-01 08:06:41,206][16868] Signal inference workers to stop experience collection... (700 times) -[2024-09-01 08:06:41,256][16881] InferenceWorker_p0-w0: stopping experience collection (700 times) -[2024-09-01 08:06:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4407296. Throughput: 0: 227.6. Samples: 719284. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:06:42,157][00307] Avg episode reward: [(0, '26.005')] -[2024-09-01 08:06:42,633][16868] Signal inference workers to resume experience collection... (700 times) -[2024-09-01 08:06:42,634][16881] InferenceWorker_p0-w0: resuming experience collection (700 times) -[2024-09-01 08:06:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4415488. Throughput: 0: 218.1. Samples: 719968. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:06:47,157][00307] Avg episode reward: [(0, '26.415')] -[2024-09-01 08:06:52,158][00307] Fps is (10 sec: 1228.3, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4419584. Throughput: 0: 223.9. Samples: 721176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:06:52,163][00307] Avg episode reward: [(0, '25.815')] -[2024-09-01 08:06:57,168][00307] Fps is (10 sec: 818.1, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 4423680. Throughput: 0: 216.9. Samples: 722446. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:06:57,172][00307] Avg episode reward: [(0, '26.074')] -[2024-09-01 08:07:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4427776. Throughput: 0: 228.7. Samples: 723940. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:07:02,157][00307] Avg episode reward: [(0, '26.277')] -[2024-09-01 08:07:07,154][00307] Fps is (10 sec: 820.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4431872. Throughput: 0: 231.4. Samples: 724596. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:07:07,159][00307] Avg episode reward: [(0, '26.962')] -[2024-09-01 08:07:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4435968. Throughput: 0: 215.8. Samples: 725644. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:07:12,162][00307] Avg episode reward: [(0, '26.895')] -[2024-09-01 08:07:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4440064. Throughput: 0: 220.0. Samples: 727168. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:07:17,158][00307] Avg episode reward: [(0, '27.014')] -[2024-09-01 08:07:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4444160. Throughput: 0: 225.9. Samples: 727850. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:07:22,163][00307] Avg episode reward: [(0, '27.221')] -[2024-09-01 08:07:22,909][16881] Updated weights for policy 0, policy_version 1086 (0.0549) -[2024-09-01 08:07:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4448256. Throughput: 0: 222.5. Samples: 729298. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:27,157][00307] Avg episode reward: [(0, '26.774')] -[2024-09-01 08:07:28,661][16868] Saving new best policy, reward=27.221! -[2024-09-01 08:07:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4452352. Throughput: 0: 233.9. Samples: 730492. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:32,168][00307] Avg episode reward: [(0, '27.351')] -[2024-09-01 08:07:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4456448. Throughput: 0: 222.5. Samples: 731188. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:37,161][00307] Avg episode reward: [(0, '27.460')] -[2024-09-01 08:07:37,167][16868] Saving new best policy, reward=27.351! -[2024-09-01 08:07:41,078][16868] Saving new best policy, reward=27.460! -[2024-09-01 08:07:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4464640. Throughput: 0: 224.1. Samples: 732528. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:42,165][00307] Avg episode reward: [(0, '28.025')] -[2024-09-01 08:07:47,083][16868] Saving new best policy, reward=28.025! -[2024-09-01 08:07:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4468736. Throughput: 0: 213.8. Samples: 733560. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:47,157][00307] Avg episode reward: [(0, '27.667')] -[2024-09-01 08:07:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4472832. Throughput: 0: 216.9. Samples: 734356. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:52,156][00307] Avg episode reward: [(0, '27.620')] -[2024-09-01 08:07:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.7, 300 sec: 902.5). Total num frames: 4476928. Throughput: 0: 225.7. Samples: 735800. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:07:57,157][00307] Avg episode reward: [(0, '27.798')] -[2024-09-01 08:07:59,436][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001094_4481024.pth... -[2024-09-01 08:07:59,568][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001042_4268032.pth -[2024-09-01 08:08:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4481024. Throughput: 0: 224.0. Samples: 737248. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:02,160][00307] Avg episode reward: [(0, '28.028')] -[2024-09-01 08:08:05,639][16868] Saving new best policy, reward=28.028! -[2024-09-01 08:08:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4485120. Throughput: 0: 218.8. Samples: 737698. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:07,161][00307] Avg episode reward: [(0, '28.451')] -[2024-09-01 08:08:10,180][16868] Saving new best policy, reward=28.451! -[2024-09-01 08:08:10,194][16881] Updated weights for policy 0, policy_version 1096 (0.0079) -[2024-09-01 08:08:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4489216. Throughput: 0: 216.8. Samples: 739052. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:12,159][00307] Avg episode reward: [(0, '28.666')] -[2024-09-01 08:08:14,108][16868] Saving new best policy, reward=28.666! -[2024-09-01 08:08:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4493312. Throughput: 0: 227.7. Samples: 740740. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:17,166][00307] Avg episode reward: [(0, '29.024')] -[2024-09-01 08:08:22,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4497408. Throughput: 0: 221.0. Samples: 741132. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:08:22,161][00307] Avg episode reward: [(0, '28.491')] -[2024-09-01 08:08:24,331][16868] Saving new best policy, reward=29.024! -[2024-09-01 08:08:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4501504. Throughput: 0: 217.2. Samples: 742304. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:08:27,164][00307] Avg episode reward: [(0, '28.866')] -[2024-09-01 08:08:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4505600. Throughput: 0: 228.0. Samples: 743822. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:08:32,166][00307] Avg episode reward: [(0, '29.267')] -[2024-09-01 08:08:36,617][16868] Saving new best policy, reward=29.267! -[2024-09-01 08:08:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4513792. Throughput: 0: 233.1. Samples: 744844. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:37,157][00307] Avg episode reward: [(0, '29.680')] -[2024-09-01 08:08:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4513792. Throughput: 0: 222.0. Samples: 745792. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:42,162][00307] Avg episode reward: [(0, '29.629')] -[2024-09-01 08:08:42,680][16868] Saving new best policy, reward=29.680! -[2024-09-01 08:08:47,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4521984. Throughput: 0: 216.3. Samples: 746982. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:47,161][00307] Avg episode reward: [(0, '29.211')] -[2024-09-01 08:08:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4526080. Throughput: 0: 226.9. Samples: 747910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:08:52,156][00307] Avg episode reward: [(0, '29.211')] -[2024-09-01 08:08:55,470][16881] Updated weights for policy 0, policy_version 1106 (0.0551) -[2024-09-01 08:08:57,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4530176. Throughput: 0: 222.7. Samples: 749072. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:08:57,161][00307] Avg episode reward: [(0, '29.812')] -[2024-09-01 08:09:01,166][16868] Saving new best policy, reward=29.812! -[2024-09-01 08:09:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4534272. Throughput: 0: 211.7. Samples: 750266. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:02,162][00307] Avg episode reward: [(0, '29.445')] -[2024-09-01 08:09:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4538368. Throughput: 0: 217.6. Samples: 750924. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:09:07,158][00307] Avg episode reward: [(0, '28.284')] -[2024-09-01 08:09:12,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4542464. Throughput: 0: 231.8. Samples: 752734. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:09:12,162][00307] Avg episode reward: [(0, '29.265')] -[2024-09-01 08:09:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4546560. Throughput: 0: 218.4. Samples: 753652. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:17,162][00307] Avg episode reward: [(0, '30.316')] -[2024-09-01 08:09:19,721][16868] Saving new best policy, reward=30.316! -[2024-09-01 08:09:22,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4550656. Throughput: 0: 206.7. Samples: 754144. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:22,157][00307] Avg episode reward: [(0, '30.261')] -[2024-09-01 08:09:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4554752. Throughput: 0: 226.9. Samples: 756004. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:09:27,166][00307] Avg episode reward: [(0, '30.113')] -[2024-09-01 08:09:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4558848. Throughput: 0: 224.3. Samples: 757074. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:09:32,160][00307] Avg episode reward: [(0, '29.774')] -[2024-09-01 08:09:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4562944. Throughput: 0: 213.8. Samples: 757530. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:37,162][00307] Avg episode reward: [(0, '29.181')] -[2024-09-01 08:09:41,861][16881] Updated weights for policy 0, policy_version 1116 (0.0040) -[2024-09-01 08:09:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4571136. Throughput: 0: 223.2. Samples: 759116. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) -[2024-09-01 08:09:42,158][00307] Avg episode reward: [(0, '29.654')] -[2024-09-01 08:09:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4575232. Throughput: 0: 228.5. Samples: 760548. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) -[2024-09-01 08:09:47,166][00307] Avg episode reward: [(0, '28.364')] -[2024-09-01 08:09:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4579328. Throughput: 0: 227.2. Samples: 761146. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:52,157][00307] Avg episode reward: [(0, '27.827')] -[2024-09-01 08:09:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4583424. Throughput: 0: 210.3. Samples: 762196. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:09:57,157][00307] Avg episode reward: [(0, '27.787')] -[2024-09-01 08:10:00,227][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001120_4587520.pth... -[2024-09-01 08:10:00,335][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001068_4374528.pth -[2024-09-01 08:10:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4587520. Throughput: 0: 229.1. Samples: 763962. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:02,158][00307] Avg episode reward: [(0, '27.170')] -[2024-09-01 08:10:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4591616. Throughput: 0: 229.1. Samples: 764454. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:07,165][00307] Avg episode reward: [(0, '27.180')] -[2024-09-01 08:10:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4595712. Throughput: 0: 210.8. Samples: 765488. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:12,162][00307] Avg episode reward: [(0, '27.085')] -[2024-09-01 08:10:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4599808. Throughput: 0: 225.5. Samples: 767222. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:17,157][00307] Avg episode reward: [(0, '27.114')] -[2024-09-01 08:10:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4603904. Throughput: 0: 228.1. Samples: 767796. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:22,163][00307] Avg episode reward: [(0, '27.014')] -[2024-09-01 08:10:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4608000. Throughput: 0: 221.3. Samples: 769076. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:27,157][00307] Avg episode reward: [(0, '27.325')] -[2024-09-01 08:10:28,562][16881] Updated weights for policy 0, policy_version 1126 (0.0553) -[2024-09-01 08:10:31,554][16868] Signal inference workers to stop experience collection... (750 times) -[2024-09-01 08:10:31,601][16881] InferenceWorker_p0-w0: stopping experience collection (750 times) -[2024-09-01 08:10:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4612096. Throughput: 0: 218.8. Samples: 770396. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:32,162][00307] Avg episode reward: [(0, '27.257')] -[2024-09-01 08:10:32,956][16868] Signal inference workers to resume experience collection... (750 times) -[2024-09-01 08:10:32,960][16881] InferenceWorker_p0-w0: resuming experience collection (750 times) -[2024-09-01 08:10:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4620288. Throughput: 0: 220.4. Samples: 771062. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:37,158][00307] Avg episode reward: [(0, '27.325')] -[2024-09-01 08:10:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4624384. Throughput: 0: 228.2. Samples: 772464. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:10:42,159][00307] Avg episode reward: [(0, '27.664')] -[2024-09-01 08:10:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4628480. Throughput: 0: 212.5. Samples: 773526. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:10:47,158][00307] Avg episode reward: [(0, '28.153')] -[2024-09-01 08:10:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4632576. Throughput: 0: 223.5. Samples: 774512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:10:52,165][00307] Avg episode reward: [(0, '28.534')] -[2024-09-01 08:10:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4636672. Throughput: 0: 232.3. Samples: 775940. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:10:57,157][00307] Avg episode reward: [(0, '28.612')] -[2024-09-01 08:11:02,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4640768. Throughput: 0: 221.4. Samples: 777184. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:11:02,161][00307] Avg episode reward: [(0, '28.272')] -[2024-09-01 08:11:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4644864. Throughput: 0: 216.8. Samples: 777554. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:11:07,164][00307] Avg episode reward: [(0, '28.337')] -[2024-09-01 08:11:12,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4648960. Throughput: 0: 229.5. Samples: 779402. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:11:12,158][00307] Avg episode reward: [(0, '28.788')] -[2024-09-01 08:11:12,965][16881] Updated weights for policy 0, policy_version 1136 (0.0545) -[2024-09-01 08:11:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4653056. Throughput: 0: 228.9. Samples: 780696. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:11:17,157][00307] Avg episode reward: [(0, '28.374')] -[2024-09-01 08:11:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4657152. Throughput: 0: 227.3. Samples: 781290. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:11:22,159][00307] Avg episode reward: [(0, '27.607')] -[2024-09-01 08:11:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4661248. Throughput: 0: 228.3. Samples: 782738. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:11:27,163][00307] Avg episode reward: [(0, '27.531')] -[2024-09-01 08:11:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4669440. Throughput: 0: 236.6. Samples: 784174. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:11:32,156][00307] Avg episode reward: [(0, '27.137')] -[2024-09-01 08:11:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4673536. Throughput: 0: 228.3. Samples: 784784. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:11:37,159][00307] Avg episode reward: [(0, '26.997')] -[2024-09-01 08:11:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4677632. Throughput: 0: 219.7. Samples: 785828. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:11:42,156][00307] Avg episode reward: [(0, '27.029')] -[2024-09-01 08:11:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4681728. Throughput: 0: 230.9. Samples: 787574. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:11:47,157][00307] Avg episode reward: [(0, '26.515')] -[2024-09-01 08:11:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.7). Total num frames: 4685824. Throughput: 0: 238.2. Samples: 788272. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:11:52,158][00307] Avg episode reward: [(0, '27.177')] -[2024-09-01 08:11:57,163][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 4689920. Throughput: 0: 220.9. Samples: 789344. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:11:57,165][00307] Avg episode reward: [(0, '27.787')] -[2024-09-01 08:11:59,503][16881] Updated weights for policy 0, policy_version 1146 (0.0755) -[2024-09-01 08:11:59,507][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001146_4694016.pth... -[2024-09-01 08:11:59,626][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001094_4481024.pth -[2024-09-01 08:12:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4694016. Throughput: 0: 226.5. Samples: 790890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:12:02,162][00307] Avg episode reward: [(0, '27.308')] -[2024-09-01 08:12:07,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4698112. Throughput: 0: 228.6. Samples: 791578. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:12:07,167][00307] Avg episode reward: [(0, '27.512')] -[2024-09-01 08:12:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4702208. Throughput: 0: 225.6. Samples: 792892. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:12:12,157][00307] Avg episode reward: [(0, '27.611')] -[2024-09-01 08:12:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4706304. Throughput: 0: 220.3. Samples: 794088. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:12:17,160][00307] Avg episode reward: [(0, '27.808')] -[2024-09-01 08:12:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4714496. Throughput: 0: 221.2. Samples: 794738. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:12:22,156][00307] Avg episode reward: [(0, '27.066')] -[2024-09-01 08:12:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4718592. Throughput: 0: 231.5. Samples: 796246. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:27,164][00307] Avg episode reward: [(0, '26.068')] -[2024-09-01 08:12:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4722688. Throughput: 0: 216.5. Samples: 797316. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:32,161][00307] Avg episode reward: [(0, '26.211')] -[2024-09-01 08:12:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4726784. Throughput: 0: 220.8. Samples: 798208. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:37,163][00307] Avg episode reward: [(0, '26.234')] -[2024-09-01 08:12:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4730880. Throughput: 0: 223.9. Samples: 799416. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:42,156][00307] Avg episode reward: [(0, '25.698')] -[2024-09-01 08:12:44,172][16881] Updated weights for policy 0, policy_version 1156 (0.1524) -[2024-09-01 08:12:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4734976. Throughput: 0: 224.6. Samples: 800996. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:47,160][00307] Avg episode reward: [(0, '26.022')] -[2024-09-01 08:12:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4739072. Throughput: 0: 215.6. Samples: 801278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:12:52,166][00307] Avg episode reward: [(0, '25.786')] -[2024-09-01 08:12:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 4743168. Throughput: 0: 223.2. Samples: 802934. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:12:57,167][00307] Avg episode reward: [(0, '25.932')] -[2024-09-01 08:13:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4747264. Throughput: 0: 228.1. Samples: 804352. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:13:02,159][00307] Avg episode reward: [(0, '25.738')] -[2024-09-01 08:13:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4751360. Throughput: 0: 228.2. Samples: 805008. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:13:07,158][00307] Avg episode reward: [(0, '26.140')] -[2024-09-01 08:13:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4755456. Throughput: 0: 221.6. Samples: 806216. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:13:12,165][00307] Avg episode reward: [(0, '26.069')] -[2024-09-01 08:13:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4763648. Throughput: 0: 228.4. Samples: 807596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:13:17,157][00307] Avg episode reward: [(0, '25.498')] -[2024-09-01 08:13:22,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4767744. Throughput: 0: 225.9. Samples: 808374. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:22,165][00307] Avg episode reward: [(0, '25.750')] -[2024-09-01 08:13:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4771840. Throughput: 0: 222.0. Samples: 809406. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:27,158][00307] Avg episode reward: [(0, '26.316')] -[2024-09-01 08:13:30,716][16881] Updated weights for policy 0, policy_version 1166 (0.0043) -[2024-09-01 08:13:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4775936. Throughput: 0: 221.2. Samples: 810948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:32,157][00307] Avg episode reward: [(0, '27.083')] -[2024-09-01 08:13:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4780032. Throughput: 0: 226.0. Samples: 811446. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:37,156][00307] Avg episode reward: [(0, '26.385')] -[2024-09-01 08:13:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4784128. Throughput: 0: 222.0. Samples: 812922. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:42,161][00307] Avg episode reward: [(0, '26.721')] -[2024-09-01 08:13:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4788224. Throughput: 0: 220.2. Samples: 814262. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:47,157][00307] Avg episode reward: [(0, '26.681')] -[2024-09-01 08:13:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4792320. Throughput: 0: 222.5. Samples: 815020. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:13:52,161][00307] Avg episode reward: [(0, '27.094')] -[2024-09-01 08:13:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4800512. Throughput: 0: 230.5. Samples: 816590. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:13:57,160][00307] Avg episode reward: [(0, '27.084')] -[2024-09-01 08:14:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4800512. Throughput: 0: 222.0. Samples: 817584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:14:02,157][00307] Avg episode reward: [(0, '27.788')] -[2024-09-01 08:14:02,674][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001173_4804608.pth... -[2024-09-01 08:14:02,794][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001120_4587520.pth -[2024-09-01 08:14:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4804608. Throughput: 0: 219.8. Samples: 818264. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:14:07,165][00307] Avg episode reward: [(0, '27.752')] -[2024-09-01 08:14:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4812800. Throughput: 0: 227.7. Samples: 819652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:14:12,167][00307] Avg episode reward: [(0, '28.752')] -[2024-09-01 08:14:14,824][16881] Updated weights for policy 0, policy_version 1176 (0.1193) -[2024-09-01 08:14:17,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4816896. Throughput: 0: 226.3. Samples: 821132. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:17,163][00307] Avg episode reward: [(0, '28.931')] -[2024-09-01 08:14:19,099][16868] Signal inference workers to stop experience collection... (800 times) -[2024-09-01 08:14:19,180][16881] InferenceWorker_p0-w0: stopping experience collection (800 times) -[2024-09-01 08:14:20,821][16868] Signal inference workers to resume experience collection... (800 times) -[2024-09-01 08:14:20,822][16881] InferenceWorker_p0-w0: resuming experience collection (800 times) -[2024-09-01 08:14:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4820992. Throughput: 0: 226.1. Samples: 821622. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:22,158][00307] Avg episode reward: [(0, '29.256')] -[2024-09-01 08:14:27,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4825088. Throughput: 0: 224.5. Samples: 823026. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:27,159][00307] Avg episode reward: [(0, '29.133')] -[2024-09-01 08:14:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4829184. Throughput: 0: 231.7. Samples: 824688. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:32,157][00307] Avg episode reward: [(0, '29.227')] -[2024-09-01 08:14:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4833280. Throughput: 0: 224.9. Samples: 825142. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:37,161][00307] Avg episode reward: [(0, '29.084')] -[2024-09-01 08:14:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4837376. Throughput: 0: 217.2. Samples: 826364. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:42,156][00307] Avg episode reward: [(0, '28.481')] -[2024-09-01 08:14:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4841472. Throughput: 0: 227.2. Samples: 827810. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:47,165][00307] Avg episode reward: [(0, '28.396')] -[2024-09-01 08:14:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4849664. Throughput: 0: 232.5. Samples: 828728. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:52,163][00307] Avg episode reward: [(0, '28.217')] -[2024-09-01 08:14:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4849664. Throughput: 0: 221.2. Samples: 829606. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:14:57,162][00307] Avg episode reward: [(0, '27.823')] -[2024-09-01 08:15:02,106][16881] Updated weights for policy 0, policy_version 1186 (0.2439) -[2024-09-01 08:15:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4857856. Throughput: 0: 204.0. Samples: 830312. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:02,166][00307] Avg episode reward: [(0, '27.558')] -[2024-09-01 08:15:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4861952. Throughput: 0: 226.6. Samples: 831820. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:07,160][00307] Avg episode reward: [(0, '27.706')] -[2024-09-01 08:15:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4866048. Throughput: 0: 222.4. Samples: 833032. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:12,162][00307] Avg episode reward: [(0, '27.299')] -[2024-09-01 08:15:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4870144. Throughput: 0: 214.0. Samples: 834316. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:17,159][00307] Avg episode reward: [(0, '27.372')] -[2024-09-01 08:15:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4874240. Throughput: 0: 218.3. Samples: 834964. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:22,157][00307] Avg episode reward: [(0, '27.681')] -[2024-09-01 08:15:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4878336. Throughput: 0: 231.3. Samples: 836774. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:15:27,159][00307] Avg episode reward: [(0, '28.327')] -[2024-09-01 08:15:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4882432. Throughput: 0: 220.7. Samples: 837740. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:15:32,165][00307] Avg episode reward: [(0, '28.352')] -[2024-09-01 08:15:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4886528. Throughput: 0: 209.9. Samples: 838172. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:15:37,156][00307] Avg episode reward: [(0, '27.471')] -[2024-09-01 08:15:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4894720. Throughput: 0: 234.1. Samples: 840140. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:15:42,167][00307] Avg episode reward: [(0, '27.857')] -[2024-09-01 08:15:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4894720. Throughput: 0: 240.1. Samples: 841116. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:15:47,156][00307] Avg episode reward: [(0, '27.932')] -[2024-09-01 08:15:47,401][16881] Updated weights for policy 0, policy_version 1196 (0.1556) -[2024-09-01 08:15:52,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4898816. Throughput: 0: 220.8. Samples: 841754. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:15:52,160][00307] Avg episode reward: [(0, '27.337')] -[2024-09-01 08:15:57,157][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4907008. Throughput: 0: 224.2. Samples: 843120. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:15:57,161][00307] Avg episode reward: [(0, '27.075')] -[2024-09-01 08:16:00,307][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001199_4911104.pth... -[2024-09-01 08:16:00,421][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001146_4694016.pth -[2024-09-01 08:16:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4911104. Throughput: 0: 231.7. Samples: 844742. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:02,162][00307] Avg episode reward: [(0, '26.680')] -[2024-09-01 08:16:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4915200. Throughput: 0: 227.9. Samples: 845218. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:07,165][00307] Avg episode reward: [(0, '26.801')] -[2024-09-01 08:16:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4919296. Throughput: 0: 212.1. Samples: 846318. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:12,156][00307] Avg episode reward: [(0, '26.199')] -[2024-09-01 08:16:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4923392. Throughput: 0: 227.0. Samples: 847954. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:17,159][00307] Avg episode reward: [(0, '26.007')] -[2024-09-01 08:16:22,162][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 902.5). Total num frames: 4927488. Throughput: 0: 228.8. Samples: 848472. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:22,167][00307] Avg episode reward: [(0, '26.101')] -[2024-09-01 08:16:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4931584. Throughput: 0: 206.9. Samples: 849452. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:27,163][00307] Avg episode reward: [(0, '25.418')] -[2024-09-01 08:16:32,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4935680. Throughput: 0: 226.4. Samples: 851304. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:32,167][00307] Avg episode reward: [(0, '25.353')] -[2024-09-01 08:16:33,515][16881] Updated weights for policy 0, policy_version 1206 (0.1534) -[2024-09-01 08:16:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4939776. Throughput: 0: 226.4. Samples: 851944. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:37,156][00307] Avg episode reward: [(0, '25.695')] -[2024-09-01 08:16:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4943872. Throughput: 0: 222.4. Samples: 853126. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:42,160][00307] Avg episode reward: [(0, '25.426')] -[2024-09-01 08:16:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4947968. Throughput: 0: 216.3. Samples: 854474. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:47,163][00307] Avg episode reward: [(0, '25.608')] -[2024-09-01 08:16:52,154][00307] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4956160. Throughput: 0: 225.7. Samples: 855376. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:52,157][00307] Avg episode reward: [(0, '25.838')] -[2024-09-01 08:16:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4960256. Throughput: 0: 226.6. Samples: 856514. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:16:57,161][00307] Avg episode reward: [(0, '25.983')] -[2024-09-01 08:17:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4960256. Throughput: 0: 213.6. Samples: 857566. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:17:02,163][00307] Avg episode reward: [(0, '26.157')] -[2024-09-01 08:17:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4968448. Throughput: 0: 226.3. Samples: 858654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:17:07,157][00307] Avg episode reward: [(0, '26.873')] -[2024-09-01 08:17:12,158][00307] Fps is (10 sec: 1228.3, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4972544. Throughput: 0: 236.2. Samples: 860082. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:12,161][00307] Avg episode reward: [(0, '26.766')] -[2024-09-01 08:17:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4976640. Throughput: 0: 217.9. Samples: 861110. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:17,157][00307] Avg episode reward: [(0, '27.098')] -[2024-09-01 08:17:20,323][16881] Updated weights for policy 0, policy_version 1216 (0.0040) -[2024-09-01 08:17:22,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 4980736. Throughput: 0: 216.8. Samples: 861700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:22,166][00307] Avg episode reward: [(0, '25.844')] -[2024-09-01 08:17:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4984832. Throughput: 0: 229.4. Samples: 863448. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:27,162][00307] Avg episode reward: [(0, '25.258')] -[2024-09-01 08:17:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4988928. Throughput: 0: 226.9. Samples: 864684. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:32,157][00307] Avg episode reward: [(0, '25.675')] -[2024-09-01 08:17:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4993024. Throughput: 0: 213.3. Samples: 864976. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:17:37,166][00307] Avg episode reward: [(0, '25.159')] -[2024-09-01 08:17:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 5001216. Throughput: 0: 228.4. Samples: 866792. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:17:42,164][00307] Avg episode reward: [(0, '25.251')] -[2024-09-01 08:17:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5005312. Throughput: 0: 231.5. Samples: 867982. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:17:47,164][00307] Avg episode reward: [(0, '25.868')] -[2024-09-01 08:17:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5009408. Throughput: 0: 223.4. Samples: 868706. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:17:52,159][00307] Avg episode reward: [(0, '25.770')] -[2024-09-01 08:17:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5013504. Throughput: 0: 216.6. Samples: 869830. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:17:57,156][00307] Avg episode reward: [(0, '25.952')] -[2024-09-01 08:18:00,297][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001225_5017600.pth... -[2024-09-01 08:18:00,411][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001173_4804608.pth -[2024-09-01 08:18:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5017600. Throughput: 0: 233.3. Samples: 871608. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:02,157][00307] Avg episode reward: [(0, '26.133')] -[2024-09-01 08:18:04,824][16881] Updated weights for policy 0, policy_version 1226 (0.1759) -[2024-09-01 08:18:07,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5021696. Throughput: 0: 227.9. Samples: 871954. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:07,161][00307] Avg episode reward: [(0, '26.608')] -[2024-09-01 08:18:09,017][16868] Signal inference workers to stop experience collection... (850 times) -[2024-09-01 08:18:09,112][16881] InferenceWorker_p0-w0: stopping experience collection (850 times) -[2024-09-01 08:18:10,182][16868] Signal inference workers to resume experience collection... (850 times) -[2024-09-01 08:18:10,182][16881] InferenceWorker_p0-w0: resuming experience collection (850 times) -[2024-09-01 08:18:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5025792. Throughput: 0: 215.5. Samples: 873144. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:12,157][00307] Avg episode reward: [(0, '26.495')] -[2024-09-01 08:18:17,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5029888. Throughput: 0: 220.8. Samples: 874622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:17,165][00307] Avg episode reward: [(0, '26.770')] -[2024-09-01 08:18:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5033984. Throughput: 0: 232.4. Samples: 875432. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:22,165][00307] Avg episode reward: [(0, '26.704')] -[2024-09-01 08:18:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5038080. Throughput: 0: 221.5. Samples: 876758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:27,157][00307] Avg episode reward: [(0, '27.241')] -[2024-09-01 08:18:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5042176. Throughput: 0: 220.0. Samples: 877882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:32,157][00307] Avg episode reward: [(0, '26.770')] -[2024-09-01 08:18:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5050368. Throughput: 0: 223.9. Samples: 878782. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:37,168][00307] Avg episode reward: [(0, '27.229')] -[2024-09-01 08:18:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5054464. Throughput: 0: 228.6. Samples: 880118. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:42,162][00307] Avg episode reward: [(0, '26.993')] -[2024-09-01 08:18:47,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5054464. Throughput: 0: 211.6. Samples: 881130. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:18:47,156][00307] Avg episode reward: [(0, '27.211')] -[2024-09-01 08:18:51,804][16881] Updated weights for policy 0, policy_version 1236 (0.2192) -[2024-09-01 08:18:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5062656. Throughput: 0: 227.0. Samples: 882168. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:52,160][00307] Avg episode reward: [(0, '27.376')] -[2024-09-01 08:18:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5066752. Throughput: 0: 229.5. Samples: 883472. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:18:57,162][00307] Avg episode reward: [(0, '28.441')] -[2024-09-01 08:19:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5070848. Throughput: 0: 225.0. Samples: 884746. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:02,157][00307] Avg episode reward: [(0, '28.364')] -[2024-09-01 08:19:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5074944. Throughput: 0: 218.4. Samples: 885260. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:07,158][00307] Avg episode reward: [(0, '28.372')] -[2024-09-01 08:19:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5079040. Throughput: 0: 225.2. Samples: 886894. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:19:12,159][00307] Avg episode reward: [(0, '27.253')] -[2024-09-01 08:19:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5083136. Throughput: 0: 232.7. Samples: 888352. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:19:17,157][00307] Avg episode reward: [(0, '26.693')] -[2024-09-01 08:19:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5087232. Throughput: 0: 221.4. Samples: 888744. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:22,157][00307] Avg episode reward: [(0, '26.220')] -[2024-09-01 08:19:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5091328. Throughput: 0: 222.0. Samples: 890108. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:27,164][00307] Avg episode reward: [(0, '26.628')] -[2024-09-01 08:19:32,156][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5099520. Throughput: 0: 232.6. Samples: 891596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:32,160][00307] Avg episode reward: [(0, '26.731')] -[2024-09-01 08:19:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5099520. Throughput: 0: 228.2. Samples: 892438. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:19:37,159][00307] Avg episode reward: [(0, '26.780')] -[2024-09-01 08:19:37,229][16881] Updated weights for policy 0, policy_version 1246 (0.0549) -[2024-09-01 08:19:42,154][00307] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5103616. Throughput: 0: 221.6. Samples: 893442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:19:42,156][00307] Avg episode reward: [(0, '26.569')] -[2024-09-01 08:19:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5111808. Throughput: 0: 225.1. Samples: 894874. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:19:47,159][00307] Avg episode reward: [(0, '25.241')] -[2024-09-01 08:19:52,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5115904. Throughput: 0: 229.2. Samples: 895572. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:19:52,168][00307] Avg episode reward: [(0, '25.242')] -[2024-09-01 08:19:57,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 5120000. Throughput: 0: 216.6. Samples: 896642. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:19:57,167][00307] Avg episode reward: [(0, '25.197')] -[2024-09-01 08:20:00,589][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001251_5124096.pth... -[2024-09-01 08:20:00,699][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001199_4911104.pth -[2024-09-01 08:20:02,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5124096. Throughput: 0: 218.0. Samples: 898162. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:20:02,156][00307] Avg episode reward: [(0, '25.046')] -[2024-09-01 08:20:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5128192. Throughput: 0: 223.8. Samples: 898814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:20:07,157][00307] Avg episode reward: [(0, '25.202')] -[2024-09-01 08:20:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5132288. Throughput: 0: 225.6. Samples: 900262. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:20:12,168][00307] Avg episode reward: [(0, '25.202')] -[2024-09-01 08:20:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5136384. Throughput: 0: 219.2. Samples: 901460. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:17,159][00307] Avg episode reward: [(0, '25.278')] -[2024-09-01 08:20:22,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5140480. Throughput: 0: 216.9. Samples: 902200. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:22,160][00307] Avg episode reward: [(0, '24.903')] -[2024-09-01 08:20:22,760][16881] Updated weights for policy 0, policy_version 1256 (0.0554) -[2024-09-01 08:20:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5148672. Throughput: 0: 229.9. Samples: 903788. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:27,156][00307] Avg episode reward: [(0, '24.768')] -[2024-09-01 08:20:32,154][00307] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5148672. Throughput: 0: 219.9. Samples: 904770. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:32,165][00307] Avg episode reward: [(0, '24.475')] -[2024-09-01 08:20:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5156864. Throughput: 0: 221.1. Samples: 905520. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:20:37,160][00307] Avg episode reward: [(0, '23.751')] -[2024-09-01 08:20:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5160960. Throughput: 0: 227.8. Samples: 906890. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:42,162][00307] Avg episode reward: [(0, '23.965')] -[2024-09-01 08:20:47,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5165056. Throughput: 0: 224.1. Samples: 908246. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:20:47,159][00307] Avg episode reward: [(0, '24.159')] -[2024-09-01 08:20:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5169152. Throughput: 0: 224.5. Samples: 908918. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:20:52,157][00307] Avg episode reward: [(0, '23.889')] -[2024-09-01 08:20:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5173248. Throughput: 0: 219.7. Samples: 910148. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:20:57,157][00307] Avg episode reward: [(0, '23.379')] -[2024-09-01 08:21:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5177344. Throughput: 0: 233.0. Samples: 911944. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:21:02,157][00307] Avg episode reward: [(0, '23.654')] -[2024-09-01 08:21:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5181440. Throughput: 0: 222.0. Samples: 912192. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:21:07,164][00307] Avg episode reward: [(0, '23.440')] -[2024-09-01 08:21:09,304][16881] Updated weights for policy 0, policy_version 1266 (0.1280) -[2024-09-01 08:21:12,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 5185536. Throughput: 0: 219.1. Samples: 913646. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:21:12,166][00307] Avg episode reward: [(0, '23.529')] -[2024-09-01 08:21:17,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5193728. Throughput: 0: 226.2. Samples: 914948. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:17,158][00307] Avg episode reward: [(0, '23.876')] -[2024-09-01 08:21:22,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5193728. Throughput: 0: 230.0. Samples: 915868. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:22,156][00307] Avg episode reward: [(0, '24.388')] -[2024-09-01 08:21:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5197824. Throughput: 0: 223.1. Samples: 916928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:27,156][00307] Avg episode reward: [(0, '24.648')] -[2024-09-01 08:21:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5206016. Throughput: 0: 224.9. Samples: 918366. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:32,162][00307] Avg episode reward: [(0, '23.984')] -[2024-09-01 08:21:37,157][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5210112. Throughput: 0: 226.3. Samples: 919102. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:37,159][00307] Avg episode reward: [(0, '24.077')] -[2024-09-01 08:21:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5214208. Throughput: 0: 223.0. Samples: 920184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:21:42,159][00307] Avg episode reward: [(0, '24.593')] -[2024-09-01 08:21:47,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5218304. Throughput: 0: 217.7. Samples: 921742. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:21:47,163][00307] Avg episode reward: [(0, '25.774')] -[2024-09-01 08:21:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5222400. Throughput: 0: 227.6. Samples: 922436. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:21:52,161][00307] Avg episode reward: [(0, '26.743')] -[2024-09-01 08:21:53,147][16881] Updated weights for policy 0, policy_version 1276 (0.0565) -[2024-09-01 08:21:57,024][16868] Signal inference workers to stop experience collection... (900 times) -[2024-09-01 08:21:57,127][16881] InferenceWorker_p0-w0: stopping experience collection (900 times) -[2024-09-01 08:21:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5226496. Throughput: 0: 229.2. Samples: 923960. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:21:57,156][00307] Avg episode reward: [(0, '27.104')] -[2024-09-01 08:21:58,940][16868] Signal inference workers to resume experience collection... (900 times) -[2024-09-01 08:21:58,941][16881] InferenceWorker_p0-w0: resuming experience collection (900 times) -[2024-09-01 08:22:02,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 5230592. Throughput: 0: 225.6. Samples: 925100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:22:02,164][00307] Avg episode reward: [(0, '27.226')] -[2024-09-01 08:22:03,477][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001278_5234688.pth... -[2024-09-01 08:22:03,591][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001225_5017600.pth -[2024-09-01 08:22:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5234688. Throughput: 0: 221.7. Samples: 925846. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:22:07,158][00307] Avg episode reward: [(0, '26.683')] -[2024-09-01 08:22:12,154][00307] Fps is (10 sec: 1229.0, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 5242880. Throughput: 0: 230.4. Samples: 927294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:22:12,160][00307] Avg episode reward: [(0, '26.981')] -[2024-09-01 08:22:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5246976. Throughput: 0: 221.2. Samples: 928322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:22:17,158][00307] Avg episode reward: [(0, '26.697')] -[2024-09-01 08:22:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5251072. Throughput: 0: 225.0. Samples: 929226. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:22:22,159][00307] Avg episode reward: [(0, '26.868')] -[2024-09-01 08:22:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5255168. Throughput: 0: 231.3. Samples: 930594. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:22:27,161][00307] Avg episode reward: [(0, '25.856')] -[2024-09-01 08:22:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5259264. Throughput: 0: 226.7. Samples: 931944. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:22:32,157][00307] Avg episode reward: [(0, '25.682')] -[2024-09-01 08:22:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5263360. Throughput: 0: 219.0. Samples: 932292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:22:37,156][00307] Avg episode reward: [(0, '26.006')] -[2024-09-01 08:22:39,300][16881] Updated weights for policy 0, policy_version 1286 (0.0537) -[2024-09-01 08:22:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5267456. Throughput: 0: 222.6. Samples: 933978. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:22:42,160][00307] Avg episode reward: [(0, '26.149')] -[2024-09-01 08:22:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5271552. Throughput: 0: 227.3. Samples: 935328. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:22:47,159][00307] Avg episode reward: [(0, '25.826')] -[2024-09-01 08:22:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5275648. Throughput: 0: 223.8. Samples: 935918. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:22:52,158][00307] Avg episode reward: [(0, '25.199')] -[2024-09-01 08:22:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5279744. Throughput: 0: 221.9. Samples: 937278. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:22:57,163][00307] Avg episode reward: [(0, '25.548')] -[2024-09-01 08:23:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 5287936. Throughput: 0: 227.6. Samples: 938566. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:23:02,157][00307] Avg episode reward: [(0, '26.268')] -[2024-09-01 08:23:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5292032. Throughput: 0: 228.0. Samples: 939488. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:23:07,165][00307] Avg episode reward: [(0, '26.283')] -[2024-09-01 08:23:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5296128. Throughput: 0: 220.0. Samples: 940492. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:23:12,163][00307] Avg episode reward: [(0, '25.922')] -[2024-09-01 08:23:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5300224. Throughput: 0: 227.2. Samples: 942168. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:17,156][00307] Avg episode reward: [(0, '24.996')] -[2024-09-01 08:23:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5304320. Throughput: 0: 235.2. Samples: 942874. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:22,160][00307] Avg episode reward: [(0, '25.125')] -[2024-09-01 08:23:25,119][16881] Updated weights for policy 0, policy_version 1296 (0.1710) -[2024-09-01 08:23:27,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5308416. Throughput: 0: 219.2. Samples: 943844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:27,165][00307] Avg episode reward: [(0, '25.419')] -[2024-09-01 08:23:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5312512. Throughput: 0: 229.2. Samples: 945640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:32,168][00307] Avg episode reward: [(0, '26.303')] -[2024-09-01 08:23:37,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5316608. Throughput: 0: 230.7. Samples: 946298. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:23:37,162][00307] Avg episode reward: [(0, '26.137')] -[2024-09-01 08:23:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5320704. Throughput: 0: 230.8. Samples: 947666. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:23:42,156][00307] Avg episode reward: [(0, '26.022')] -[2024-09-01 08:23:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5324800. Throughput: 0: 224.7. Samples: 948676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:23:47,157][00307] Avg episode reward: [(0, '26.304')] -[2024-09-01 08:23:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5332992. Throughput: 0: 222.3. Samples: 949492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:52,157][00307] Avg episode reward: [(0, '25.778')] -[2024-09-01 08:23:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5337088. Throughput: 0: 230.5. Samples: 950864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:23:57,158][00307] Avg episode reward: [(0, '25.662')] -[2024-09-01 08:24:01,311][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001304_5341184.pth... -[2024-09-01 08:24:01,450][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001251_5124096.pth -[2024-09-01 08:24:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5341184. Throughput: 0: 219.2. Samples: 952032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:24:02,159][00307] Avg episode reward: [(0, '25.439')] -[2024-09-01 08:24:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5345280. Throughput: 0: 222.7. Samples: 952896. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:24:07,164][00307] Avg episode reward: [(0, '25.571')] -[2024-09-01 08:24:10,445][16881] Updated weights for policy 0, policy_version 1306 (0.1572) -[2024-09-01 08:24:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5349376. Throughput: 0: 229.5. Samples: 954170. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:24:12,157][00307] Avg episode reward: [(0, '25.381')] -[2024-09-01 08:24:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5353472. Throughput: 0: 223.4. Samples: 955692. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:24:17,159][00307] Avg episode reward: [(0, '25.388')] -[2024-09-01 08:24:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5357568. Throughput: 0: 214.9. Samples: 955968. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:24:22,159][00307] Avg episode reward: [(0, '25.617')] -[2024-09-01 08:24:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5361664. Throughput: 0: 225.6. Samples: 957816. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:24:27,159][00307] Avg episode reward: [(0, '26.029')] -[2024-09-01 08:24:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5369856. Throughput: 0: 229.7. Samples: 959014. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:24:32,159][00307] Avg episode reward: [(0, '26.387')] -[2024-09-01 08:24:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5369856. Throughput: 0: 227.0. Samples: 959708. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:24:37,157][00307] Avg episode reward: [(0, '26.654')] -[2024-09-01 08:24:42,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5373952. Throughput: 0: 227.4. Samples: 961096. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:24:42,163][00307] Avg episode reward: [(0, '26.820')] -[2024-09-01 08:24:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5382144. Throughput: 0: 232.1. Samples: 962478. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:24:47,159][00307] Avg episode reward: [(0, '27.341')] -[2024-09-01 08:24:52,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5386240. Throughput: 0: 227.7. Samples: 963142. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:24:52,164][00307] Avg episode reward: [(0, '27.123')] -[2024-09-01 08:24:56,822][16881] Updated weights for policy 0, policy_version 1316 (0.1663) -[2024-09-01 08:24:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5390336. Throughput: 0: 221.4. Samples: 964132. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:24:57,159][00307] Avg episode reward: [(0, '27.024')] -[2024-09-01 08:25:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5394432. Throughput: 0: 225.7. Samples: 965850. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:25:02,162][00307] Avg episode reward: [(0, '26.810')] -[2024-09-01 08:25:07,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5398528. Throughput: 0: 229.8. Samples: 966310. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:25:07,159][00307] Avg episode reward: [(0, '27.314')] -[2024-09-01 08:25:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5402624. Throughput: 0: 218.9. Samples: 967666. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:25:12,157][00307] Avg episode reward: [(0, '27.430')] -[2024-09-01 08:25:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5406720. Throughput: 0: 226.0. Samples: 969186. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:17,167][00307] Avg episode reward: [(0, '27.769')] -[2024-09-01 08:25:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5410816. Throughput: 0: 225.1. Samples: 969836. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:22,165][00307] Avg episode reward: [(0, '27.930')] -[2024-09-01 08:25:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5414912. Throughput: 0: 228.3. Samples: 971368. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:27,159][00307] Avg episode reward: [(0, '28.679')] -[2024-09-01 08:25:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5419008. Throughput: 0: 219.7. Samples: 972366. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:32,166][00307] Avg episode reward: [(0, '28.669')] -[2024-09-01 08:25:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5427200. Throughput: 0: 222.4. Samples: 973148. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:37,162][00307] Avg episode reward: [(0, '28.809')] -[2024-09-01 08:25:41,397][16881] Updated weights for policy 0, policy_version 1326 (0.1556) -[2024-09-01 08:25:42,157][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5431296. Throughput: 0: 229.4. Samples: 974454. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:42,160][00307] Avg episode reward: [(0, '28.852')] -[2024-09-01 08:25:45,348][16868] Signal inference workers to stop experience collection... (950 times) -[2024-09-01 08:25:45,438][16881] InferenceWorker_p0-w0: stopping experience collection (950 times) -[2024-09-01 08:25:46,418][16868] Signal inference workers to resume experience collection... (950 times) -[2024-09-01 08:25:46,418][16881] InferenceWorker_p0-w0: resuming experience collection (950 times) -[2024-09-01 08:25:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5435392. Throughput: 0: 216.6. Samples: 975596. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:47,159][00307] Avg episode reward: [(0, '29.395')] -[2024-09-01 08:25:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5439488. Throughput: 0: 225.0. Samples: 976434. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:52,157][00307] Avg episode reward: [(0, '29.081')] -[2024-09-01 08:25:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5443584. Throughput: 0: 221.2. Samples: 977618. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:25:57,156][00307] Avg episode reward: [(0, '28.677')] -[2024-09-01 08:25:58,812][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001330_5447680.pth... -[2024-09-01 08:25:58,917][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001278_5234688.pth -[2024-09-01 08:26:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5447680. Throughput: 0: 223.6. Samples: 979246. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:02,157][00307] Avg episode reward: [(0, '29.137')] -[2024-09-01 08:26:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5451776. Throughput: 0: 215.7. Samples: 979542. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:07,157][00307] Avg episode reward: [(0, '28.682')] -[2024-09-01 08:26:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5455872. Throughput: 0: 213.3. Samples: 980968. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:12,157][00307] Avg episode reward: [(0, '28.221')] -[2024-09-01 08:26:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5464064. Throughput: 0: 225.1. Samples: 982496. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:17,162][00307] Avg episode reward: [(0, '27.820')] -[2024-09-01 08:26:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5464064. Throughput: 0: 224.4. Samples: 983246. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:22,157][00307] Avg episode reward: [(0, '27.966')] -[2024-09-01 08:26:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5468160. Throughput: 0: 222.7. Samples: 984476. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:27,162][00307] Avg episode reward: [(0, '28.084')] -[2024-09-01 08:26:27,406][16881] Updated weights for policy 0, policy_version 1336 (0.1045) -[2024-09-01 08:26:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5476352. Throughput: 0: 224.4. Samples: 985694. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:32,162][00307] Avg episode reward: [(0, '28.084')] -[2024-09-01 08:26:37,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5480448. Throughput: 0: 227.7. Samples: 986682. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:37,160][00307] Avg episode reward: [(0, '28.135')] -[2024-09-01 08:26:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5484544. Throughput: 0: 224.9. Samples: 987740. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:26:42,163][00307] Avg episode reward: [(0, '28.676')] -[2024-09-01 08:26:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5488640. Throughput: 0: 224.3. Samples: 989338. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:26:47,159][00307] Avg episode reward: [(0, '27.802')] -[2024-09-01 08:26:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5492736. Throughput: 0: 228.9. Samples: 989844. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:26:52,156][00307] Avg episode reward: [(0, '28.243')] -[2024-09-01 08:26:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5496832. Throughput: 0: 229.9. Samples: 991314. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:26:57,161][00307] Avg episode reward: [(0, '28.127')] -[2024-09-01 08:27:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5500928. Throughput: 0: 227.3. Samples: 992726. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:27:02,157][00307] Avg episode reward: [(0, '28.522')] -[2024-09-01 08:27:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5505024. Throughput: 0: 225.3. Samples: 993386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:27:07,167][00307] Avg episode reward: [(0, '29.001')] -[2024-09-01 08:27:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5509120. Throughput: 0: 233.3. Samples: 994976. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:27:12,156][00307] Avg episode reward: [(0, '29.162')] -[2024-09-01 08:27:12,556][16881] Updated weights for policy 0, policy_version 1346 (0.1088) -[2024-09-01 08:27:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5513216. Throughput: 0: 229.1. Samples: 996002. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:27:17,164][00307] Avg episode reward: [(0, '28.541')] -[2024-09-01 08:27:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5521408. Throughput: 0: 226.7. Samples: 996882. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:22,158][00307] Avg episode reward: [(0, '27.279')] -[2024-09-01 08:27:27,155][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5525504. Throughput: 0: 231.0. Samples: 998134. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:27,162][00307] Avg episode reward: [(0, '27.148')] -[2024-09-01 08:27:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5529600. Throughput: 0: 224.0. Samples: 999420. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:32,160][00307] Avg episode reward: [(0, '26.802')] -[2024-09-01 08:27:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5533696. Throughput: 0: 228.0. Samples: 1000106. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:37,157][00307] Avg episode reward: [(0, '26.834')] -[2024-09-01 08:27:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5537792. Throughput: 0: 227.1. Samples: 1001534. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:42,157][00307] Avg episode reward: [(0, '26.639')] -[2024-09-01 08:27:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5541888. Throughput: 0: 231.6. Samples: 1003148. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:47,157][00307] Avg episode reward: [(0, '26.013')] -[2024-09-01 08:27:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5545984. Throughput: 0: 222.1. Samples: 1003382. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:52,159][00307] Avg episode reward: [(0, '25.419')] -[2024-09-01 08:27:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5550080. Throughput: 0: 222.5. Samples: 1004990. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:27:57,163][00307] Avg episode reward: [(0, '25.953')] -[2024-09-01 08:27:57,854][16881] Updated weights for policy 0, policy_version 1356 (0.0547) -[2024-09-01 08:28:01,733][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001357_5558272.pth... -[2024-09-01 08:28:01,843][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001304_5341184.pth -[2024-09-01 08:28:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5558272. Throughput: 0: 229.5. Samples: 1006330. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:28:02,162][00307] Avg episode reward: [(0, '26.240')] -[2024-09-01 08:28:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5558272. Throughput: 0: 226.8. Samples: 1007088. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:28:07,157][00307] Avg episode reward: [(0, '26.574')] -[2024-09-01 08:28:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5562368. Throughput: 0: 224.7. Samples: 1008246. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:28:12,156][00307] Avg episode reward: [(0, '25.925')] -[2024-09-01 08:28:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5570560. Throughput: 0: 227.1. Samples: 1009638. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:28:17,163][00307] Avg episode reward: [(0, '25.890')] -[2024-09-01 08:28:22,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5574656. Throughput: 0: 227.1. Samples: 1010324. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:28:22,160][00307] Avg episode reward: [(0, '26.420')] -[2024-09-01 08:28:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5578752. Throughput: 0: 218.0. Samples: 1011346. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:28:27,160][00307] Avg episode reward: [(0, '26.781')] -[2024-09-01 08:28:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5582848. Throughput: 0: 220.7. Samples: 1013080. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:28:32,156][00307] Avg episode reward: [(0, '26.098')] -[2024-09-01 08:28:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5586944. Throughput: 0: 229.3. Samples: 1013702. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:28:37,157][00307] Avg episode reward: [(0, '26.038')] -[2024-09-01 08:28:42,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5591040. Throughput: 0: 222.9. Samples: 1015022. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:28:42,160][00307] Avg episode reward: [(0, '26.242')] -[2024-09-01 08:28:44,212][16881] Updated weights for policy 0, policy_version 1366 (0.0547) -[2024-09-01 08:28:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5595136. Throughput: 0: 224.1. Samples: 1016416. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:28:47,157][00307] Avg episode reward: [(0, '25.920')] -[2024-09-01 08:28:52,154][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5603328. Throughput: 0: 223.5. Samples: 1017146. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:28:52,157][00307] Avg episode reward: [(0, '25.528')] -[2024-09-01 08:28:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5607424. Throughput: 0: 226.6. Samples: 1018442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:28:57,158][00307] Avg episode reward: [(0, '25.528')] -[2024-09-01 08:29:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5607424. Throughput: 0: 219.5. Samples: 1019514. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:02,156][00307] Avg episode reward: [(0, '25.521')] -[2024-09-01 08:29:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5615616. Throughput: 0: 226.8. Samples: 1020528. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:07,161][00307] Avg episode reward: [(0, '25.033')] -[2024-09-01 08:29:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5619712. Throughput: 0: 234.6. Samples: 1021902. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:12,159][00307] Avg episode reward: [(0, '25.269')] -[2024-09-01 08:29:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5623808. Throughput: 0: 222.7. Samples: 1023100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:17,164][00307] Avg episode reward: [(0, '25.579')] -[2024-09-01 08:29:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5627904. Throughput: 0: 219.7. Samples: 1023590. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:22,161][00307] Avg episode reward: [(0, '24.544')] -[2024-09-01 08:29:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5632000. Throughput: 0: 230.0. Samples: 1025370. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:27,159][00307] Avg episode reward: [(0, '24.712')] -[2024-09-01 08:29:27,974][16881] Updated weights for policy 0, policy_version 1376 (0.0547) -[2024-09-01 08:29:30,641][16868] Signal inference workers to stop experience collection... (1000 times) -[2024-09-01 08:29:30,719][16881] InferenceWorker_p0-w0: stopping experience collection (1000 times) -[2024-09-01 08:29:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5636096. Throughput: 0: 227.6. Samples: 1026658. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:32,156][00307] Avg episode reward: [(0, '24.307')] -[2024-09-01 08:29:32,852][16868] Signal inference workers to resume experience collection... (1000 times) -[2024-09-01 08:29:32,854][16881] InferenceWorker_p0-w0: resuming experience collection (1000 times) -[2024-09-01 08:29:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5640192. Throughput: 0: 222.3. Samples: 1027150. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:37,156][00307] Avg episode reward: [(0, '24.707')] -[2024-09-01 08:29:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5644288. Throughput: 0: 224.6. Samples: 1028548. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:42,157][00307] Avg episode reward: [(0, '26.056')] -[2024-09-01 08:29:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5652480. Throughput: 0: 234.0. Samples: 1030042. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:29:47,156][00307] Avg episode reward: [(0, '26.967')] -[2024-09-01 08:29:52,160][00307] Fps is (10 sec: 1228.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5656576. Throughput: 0: 225.9. Samples: 1030694. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:29:52,167][00307] Avg episode reward: [(0, '27.081')] -[2024-09-01 08:29:57,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5660672. Throughput: 0: 218.1. Samples: 1031718. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:29:57,161][00307] Avg episode reward: [(0, '26.390')] -[2024-09-01 08:30:00,210][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001383_5664768.pth... -[2024-09-01 08:30:00,316][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001330_5447680.pth -[2024-09-01 08:30:02,154][00307] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5664768. Throughput: 0: 230.4. Samples: 1033466. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:30:02,156][00307] Avg episode reward: [(0, '25.892')] -[2024-09-01 08:30:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5668864. Throughput: 0: 228.4. Samples: 1033868. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:30:07,159][00307] Avg episode reward: [(0, '25.118')] -[2024-09-01 08:30:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5672960. Throughput: 0: 215.4. Samples: 1035062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:30:12,159][00307] Avg episode reward: [(0, '24.985')] -[2024-09-01 08:30:14,823][16881] Updated weights for policy 0, policy_version 1386 (0.1068) -[2024-09-01 08:30:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5677056. Throughput: 0: 224.1. Samples: 1036744. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:17,157][00307] Avg episode reward: [(0, '25.040')] -[2024-09-01 08:30:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5681152. Throughput: 0: 223.3. Samples: 1037200. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:22,165][00307] Avg episode reward: [(0, '24.680')] -[2024-09-01 08:30:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5685248. Throughput: 0: 223.7. Samples: 1038614. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:27,157][00307] Avg episode reward: [(0, '24.267')] -[2024-09-01 08:30:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5689344. Throughput: 0: 218.8. Samples: 1039888. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:32,165][00307] Avg episode reward: [(0, '25.468')] -[2024-09-01 08:30:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5697536. Throughput: 0: 218.0. Samples: 1040502. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:37,163][00307] Avg episode reward: [(0, '24.855')] -[2024-09-01 08:30:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5701632. Throughput: 0: 227.9. Samples: 1041972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:42,158][00307] Avg episode reward: [(0, '24.547')] -[2024-09-01 08:30:47,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5705728. Throughput: 0: 211.5. Samples: 1042986. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:47,161][00307] Avg episode reward: [(0, '24.547')] -[2024-09-01 08:30:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 5709824. Throughput: 0: 225.0. Samples: 1043994. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:52,161][00307] Avg episode reward: [(0, '25.115')] -[2024-09-01 08:30:57,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5713920. Throughput: 0: 230.8. Samples: 1045446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:30:57,157][00307] Avg episode reward: [(0, '25.582')] -[2024-09-01 08:30:59,197][16881] Updated weights for policy 0, policy_version 1396 (0.1530) -[2024-09-01 08:31:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5718016. Throughput: 0: 221.7. Samples: 1046722. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:31:02,157][00307] Avg episode reward: [(0, '25.229')] -[2024-09-01 08:31:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5722112. Throughput: 0: 220.9. Samples: 1047142. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:31:07,157][00307] Avg episode reward: [(0, '24.664')] -[2024-09-01 08:31:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5726208. Throughput: 0: 223.0. Samples: 1048648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:31:12,156][00307] Avg episode reward: [(0, '24.959')] -[2024-09-01 08:31:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5730304. Throughput: 0: 229.6. Samples: 1050220. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:31:17,157][00307] Avg episode reward: [(0, '25.096')] -[2024-09-01 08:31:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5734400. Throughput: 0: 228.0. Samples: 1050760. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:22,160][00307] Avg episode reward: [(0, '25.409')] -[2024-09-01 08:31:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5738496. Throughput: 0: 229.1. Samples: 1052280. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:27,158][00307] Avg episode reward: [(0, '24.825')] -[2024-09-01 08:31:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5746688. Throughput: 0: 237.2. Samples: 1053660. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:32,163][00307] Avg episode reward: [(0, '25.714')] -[2024-09-01 08:31:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5750784. Throughput: 0: 230.3. Samples: 1054358. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:37,157][00307] Avg episode reward: [(0, '25.374')] -[2024-09-01 08:31:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5754880. Throughput: 0: 222.0. Samples: 1055434. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:42,156][00307] Avg episode reward: [(0, '25.066')] -[2024-09-01 08:31:45,583][16881] Updated weights for policy 0, policy_version 1406 (0.1046) -[2024-09-01 08:31:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5758976. Throughput: 0: 230.2. Samples: 1057082. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:47,162][00307] Avg episode reward: [(0, '24.036')] -[2024-09-01 08:31:52,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5763072. Throughput: 0: 232.2. Samples: 1057592. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:31:52,162][00307] Avg episode reward: [(0, '24.345')] -[2024-09-01 08:31:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5767168. Throughput: 0: 229.0. Samples: 1058954. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:31:57,158][00307] Avg episode reward: [(0, '24.345')] -[2024-09-01 08:31:59,538][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001409_5771264.pth... -[2024-09-01 08:31:59,659][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001357_5558272.pth -[2024-09-01 08:32:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5771264. Throughput: 0: 229.1. Samples: 1060530. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:02,163][00307] Avg episode reward: [(0, '25.005')] -[2024-09-01 08:32:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5775360. Throughput: 0: 230.8. Samples: 1061146. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:07,167][00307] Avg episode reward: [(0, '25.767')] -[2024-09-01 08:32:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5779456. Throughput: 0: 225.4. Samples: 1062422. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:12,157][00307] Avg episode reward: [(0, '25.920')] -[2024-09-01 08:32:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5783552. Throughput: 0: 221.3. Samples: 1063618. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:32:17,166][00307] Avg episode reward: [(0, '26.097')] -[2024-09-01 08:32:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5791744. Throughput: 0: 224.5. Samples: 1064462. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:22,159][00307] Avg episode reward: [(0, '25.936')] -[2024-09-01 08:32:27,156][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5795840. Throughput: 0: 228.2. Samples: 1065702. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:27,162][00307] Avg episode reward: [(0, '26.155')] -[2024-09-01 08:32:30,903][16881] Updated weights for policy 0, policy_version 1416 (0.1544) -[2024-09-01 08:32:32,161][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5799936. Throughput: 0: 220.4. Samples: 1067000. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:32,167][00307] Avg episode reward: [(0, '26.524')] -[2024-09-01 08:32:37,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5804032. Throughput: 0: 225.1. Samples: 1067720. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:37,162][00307] Avg episode reward: [(0, '26.503')] -[2024-09-01 08:32:42,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5808128. Throughput: 0: 226.9. Samples: 1069164. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:42,163][00307] Avg episode reward: [(0, '26.448')] -[2024-09-01 08:32:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5812224. Throughput: 0: 225.9. Samples: 1070696. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:32:47,163][00307] Avg episode reward: [(0, '26.954')] -[2024-09-01 08:32:52,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5816320. Throughput: 0: 218.7. Samples: 1070988. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:32:52,161][00307] Avg episode reward: [(0, '26.944')] -[2024-09-01 08:32:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5820416. Throughput: 0: 227.5. Samples: 1072658. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:32:57,163][00307] Avg episode reward: [(0, '27.748')] -[2024-09-01 08:33:02,154][00307] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5828608. Throughput: 0: 230.1. Samples: 1073972. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:02,165][00307] Avg episode reward: [(0, '27.604')] -[2024-09-01 08:33:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5828608. Throughput: 0: 228.6. Samples: 1074748. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:07,157][00307] Avg episode reward: [(0, '26.960')] -[2024-09-01 08:33:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5832704. Throughput: 0: 227.2. Samples: 1075926. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:12,157][00307] Avg episode reward: [(0, '27.568')] -[2024-09-01 08:33:16,149][16881] Updated weights for policy 0, policy_version 1426 (0.0520) -[2024-09-01 08:33:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5840896. Throughput: 0: 229.9. Samples: 1077344. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 08:33:17,160][00307] Avg episode reward: [(0, '27.430')] -[2024-09-01 08:33:18,547][16868] Signal inference workers to stop experience collection... (1050 times) -[2024-09-01 08:33:18,596][16881] InferenceWorker_p0-w0: stopping experience collection (1050 times) -[2024-09-01 08:33:20,000][16868] Signal inference workers to resume experience collection... (1050 times) -[2024-09-01 08:33:20,001][16881] InferenceWorker_p0-w0: resuming experience collection (1050 times) -[2024-09-01 08:33:22,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5844992. Throughput: 0: 229.2. Samples: 1078034. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 08:33:22,161][00307] Avg episode reward: [(0, '27.402')] -[2024-09-01 08:33:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5849088. Throughput: 0: 220.2. Samples: 1079072. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 08:33:27,157][00307] Avg episode reward: [(0, '27.338')] -[2024-09-01 08:33:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 5853184. Throughput: 0: 225.2. Samples: 1080830. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 08:33:32,156][00307] Avg episode reward: [(0, '28.499')] -[2024-09-01 08:33:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5857280. Throughput: 0: 233.0. Samples: 1081474. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:37,163][00307] Avg episode reward: [(0, '28.142')] -[2024-09-01 08:33:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5861376. Throughput: 0: 224.0. Samples: 1082740. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:42,160][00307] Avg episode reward: [(0, '28.131')] -[2024-09-01 08:33:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5865472. Throughput: 0: 225.5. Samples: 1084118. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:47,156][00307] Avg episode reward: [(0, '26.980')] -[2024-09-01 08:33:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5869568. Throughput: 0: 220.1. Samples: 1084652. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:52,165][00307] Avg episode reward: [(0, '26.636')] -[2024-09-01 08:33:57,156][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5877760. Throughput: 0: 227.0. Samples: 1086140. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:33:57,161][00307] Avg episode reward: [(0, '26.545')] -[2024-09-01 08:34:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5877760. Throughput: 0: 218.0. Samples: 1087156. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:34:02,164][00307] Avg episode reward: [(0, '26.400')] -[2024-09-01 08:34:02,436][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001436_5881856.pth... -[2024-09-01 08:34:02,452][16881] Updated weights for policy 0, policy_version 1436 (0.1694) -[2024-09-01 08:34:02,546][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001383_5664768.pth -[2024-09-01 08:34:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5885952. Throughput: 0: 225.3. Samples: 1088172. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 08:34:07,162][00307] Avg episode reward: [(0, '26.315')] -[2024-09-01 08:34:12,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5890048. Throughput: 0: 231.5. Samples: 1089488. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:34:12,159][00307] Avg episode reward: [(0, '26.421')] -[2024-09-01 08:34:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5894144. Throughput: 0: 217.4. Samples: 1090614. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 08:34:17,162][00307] Avg episode reward: [(0, '26.219')] -[2024-09-01 08:34:22,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5898240. Throughput: 0: 215.0. Samples: 1091148. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:34:22,159][00307] Avg episode reward: [(0, '26.259')] -[2024-09-01 08:34:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5902336. Throughput: 0: 224.5. Samples: 1092844. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:34:27,161][00307] Avg episode reward: [(0, '26.358')] -[2024-09-01 08:34:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5906432. Throughput: 0: 225.7. Samples: 1094274. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:34:32,157][00307] Avg episode reward: [(0, '25.771')] -[2024-09-01 08:34:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5910528. Throughput: 0: 219.0. Samples: 1094506. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:34:37,160][00307] Avg episode reward: [(0, '25.503')] -[2024-09-01 08:34:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5914624. Throughput: 0: 224.9. Samples: 1096262. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:34:42,166][00307] Avg episode reward: [(0, '25.503')] -[2024-09-01 08:34:46,260][16881] Updated weights for policy 0, policy_version 1446 (0.1712) -[2024-09-01 08:34:47,156][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5922816. Throughput: 0: 230.6. Samples: 1097534. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:34:47,159][00307] Avg episode reward: [(0, '25.259')] -[2024-09-01 08:34:52,158][00307] Fps is (10 sec: 1228.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5926912. Throughput: 0: 223.4. Samples: 1098228. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:34:52,168][00307] Avg episode reward: [(0, '26.005')] -[2024-09-01 08:34:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5931008. Throughput: 0: 219.1. Samples: 1099348. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:34:57,157][00307] Avg episode reward: [(0, '26.093')] -[2024-09-01 08:35:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5935104. Throughput: 0: 232.8. Samples: 1101092. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:35:02,157][00307] Avg episode reward: [(0, '25.631')] -[2024-09-01 08:35:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5939200. Throughput: 0: 231.7. Samples: 1101574. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:35:07,160][00307] Avg episode reward: [(0, '25.703')] -[2024-09-01 08:35:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5943296. Throughput: 0: 217.2. Samples: 1102618. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:35:12,160][00307] Avg episode reward: [(0, '25.995')] -[2024-09-01 08:35:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5947392. Throughput: 0: 225.3. Samples: 1104412. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:35:17,166][00307] Avg episode reward: [(0, '25.869')] -[2024-09-01 08:35:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5951488. Throughput: 0: 231.6. Samples: 1104928. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:35:22,157][00307] Avg episode reward: [(0, '26.415')] -[2024-09-01 08:35:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5955584. Throughput: 0: 222.4. Samples: 1106272. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:35:27,162][00307] Avg episode reward: [(0, '26.415')] -[2024-09-01 08:35:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5959680. Throughput: 0: 219.5. Samples: 1107410. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:35:32,168][00307] Avg episode reward: [(0, '26.716')] -[2024-09-01 08:35:32,394][16881] Updated weights for policy 0, policy_version 1456 (0.1021) -[2024-09-01 08:35:37,156][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5967872. Throughput: 0: 226.1. Samples: 1108402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:35:37,162][00307] Avg episode reward: [(0, '26.427')] -[2024-09-01 08:35:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5971968. Throughput: 0: 225.6. Samples: 1109498. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:35:42,164][00307] Avg episode reward: [(0, '26.501')] -[2024-09-01 08:35:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5976064. Throughput: 0: 212.1. Samples: 1110636. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:35:47,165][00307] Avg episode reward: [(0, '26.725')] -[2024-09-01 08:35:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5980160. Throughput: 0: 222.1. Samples: 1111570. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:35:52,157][00307] Avg episode reward: [(0, '26.518')] -[2024-09-01 08:35:57,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5984256. Throughput: 0: 231.9. Samples: 1113056. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:35:57,165][00307] Avg episode reward: [(0, '26.426')] -[2024-09-01 08:35:59,315][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001462_5988352.pth... -[2024-09-01 08:35:59,465][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001409_5771264.pth -[2024-09-01 08:36:02,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5988352. Throughput: 0: 218.6. Samples: 1114248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:36:02,158][00307] Avg episode reward: [(0, '26.678')] -[2024-09-01 08:36:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5992448. Throughput: 0: 216.5. Samples: 1114670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:36:07,157][00307] Avg episode reward: [(0, '26.946')] -[2024-09-01 08:36:12,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5996544. Throughput: 0: 221.5. Samples: 1116240. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:36:12,164][00307] Avg episode reward: [(0, '28.005')] -[2024-09-01 08:36:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6000640. Throughput: 0: 226.3. Samples: 1117594. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:36:17,157][00307] Avg episode reward: [(0, '27.343')] -[2024-09-01 08:36:17,846][16881] Updated weights for policy 0, policy_version 1466 (0.2028) -[2024-09-01 08:36:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6004736. Throughput: 0: 215.3. Samples: 1118090. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:36:22,162][00307] Avg episode reward: [(0, '27.425')] -[2024-09-01 08:36:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6012928. Throughput: 0: 223.6. Samples: 1119562. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:36:27,159][00307] Avg episode reward: [(0, '27.554')] -[2024-09-01 08:36:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6017024. Throughput: 0: 234.2. Samples: 1121176. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:36:32,156][00307] Avg episode reward: [(0, '27.462')] -[2024-09-01 08:36:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6021120. Throughput: 0: 223.7. Samples: 1121636. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:36:37,159][00307] Avg episode reward: [(0, '27.247')] -[2024-09-01 08:36:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6025216. Throughput: 0: 213.0. Samples: 1122640. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:36:42,161][00307] Avg episode reward: [(0, '27.100')] -[2024-09-01 08:36:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6029312. Throughput: 0: 227.6. Samples: 1124490. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:36:47,159][00307] Avg episode reward: [(0, '27.776')] -[2024-09-01 08:36:52,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6033408. Throughput: 0: 232.8. Samples: 1125146. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:36:52,161][00307] Avg episode reward: [(0, '26.443')] -[2024-09-01 08:36:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6037504. Throughput: 0: 220.8. Samples: 1126178. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:36:57,160][00307] Avg episode reward: [(0, '26.224')] -[2024-09-01 08:37:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6041600. Throughput: 0: 227.5. Samples: 1127832. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:02,161][00307] Avg episode reward: [(0, '25.796')] -[2024-09-01 08:37:03,186][16881] Updated weights for policy 0, policy_version 1476 (0.0562) -[2024-09-01 08:37:05,606][16868] Signal inference workers to stop experience collection... (1100 times) -[2024-09-01 08:37:05,653][16881] InferenceWorker_p0-w0: stopping experience collection (1100 times) -[2024-09-01 08:37:07,073][16868] Signal inference workers to resume experience collection... (1100 times) -[2024-09-01 08:37:07,075][16881] InferenceWorker_p0-w0: resuming experience collection (1100 times) -[2024-09-01 08:37:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6049792. Throughput: 0: 233.6. Samples: 1128600. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:07,157][00307] Avg episode reward: [(0, '25.113')] -[2024-09-01 08:37:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6049792. Throughput: 0: 225.7. Samples: 1129718. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:12,161][00307] Avg episode reward: [(0, '25.113')] -[2024-09-01 08:37:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6053888. Throughput: 0: 216.8. Samples: 1130932. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:17,162][00307] Avg episode reward: [(0, '25.382')] -[2024-09-01 08:37:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6062080. Throughput: 0: 229.9. Samples: 1131982. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:22,159][00307] Avg episode reward: [(0, '24.769')] -[2024-09-01 08:37:27,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6066176. Throughput: 0: 236.3. Samples: 1133274. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:27,158][00307] Avg episode reward: [(0, '24.498')] -[2024-09-01 08:37:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6070272. Throughput: 0: 218.1. Samples: 1134304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:37:32,163][00307] Avg episode reward: [(0, '24.687')] -[2024-09-01 08:37:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6074368. Throughput: 0: 220.0. Samples: 1135044. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:37:37,156][00307] Avg episode reward: [(0, '24.894')] -[2024-09-01 08:37:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6078464. Throughput: 0: 230.7. Samples: 1136558. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:42,157][00307] Avg episode reward: [(0, '24.868')] -[2024-09-01 08:37:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6082560. Throughput: 0: 225.4. Samples: 1137974. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:47,164][00307] Avg episode reward: [(0, '24.631')] -[2024-09-01 08:37:49,276][16881] Updated weights for policy 0, policy_version 1486 (0.1789) -[2024-09-01 08:37:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6086656. Throughput: 0: 216.0. Samples: 1138320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:52,166][00307] Avg episode reward: [(0, '24.669')] -[2024-09-01 08:37:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6090752. Throughput: 0: 228.4. Samples: 1139998. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:37:57,166][00307] Avg episode reward: [(0, '24.695')] -[2024-09-01 08:38:01,578][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001489_6098944.pth... -[2024-09-01 08:38:01,688][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001436_5881856.pth -[2024-09-01 08:38:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6098944. Throughput: 0: 230.3. Samples: 1141294. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:02,159][00307] Avg episode reward: [(0, '24.637')] -[2024-09-01 08:38:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 6098944. Throughput: 0: 222.8. Samples: 1142006. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:07,158][00307] Avg episode reward: [(0, '25.062')] -[2024-09-01 08:38:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6107136. Throughput: 0: 220.6. Samples: 1143200. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:12,165][00307] Avg episode reward: [(0, '25.373')] -[2024-09-01 08:38:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6111232. Throughput: 0: 232.2. Samples: 1144752. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:17,164][00307] Avg episode reward: [(0, '24.903')] -[2024-09-01 08:38:22,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6115328. Throughput: 0: 227.3. Samples: 1145274. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:22,160][00307] Avg episode reward: [(0, '24.903')] -[2024-09-01 08:38:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6119424. Throughput: 0: 216.7. Samples: 1146308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:38:27,156][00307] Avg episode reward: [(0, '24.785')] -[2024-09-01 08:38:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6123520. Throughput: 0: 226.5. Samples: 1148166. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:38:32,163][00307] Avg episode reward: [(0, '25.958')] -[2024-09-01 08:38:34,514][16881] Updated weights for policy 0, policy_version 1496 (0.1054) -[2024-09-01 08:38:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6127616. Throughput: 0: 229.2. Samples: 1148632. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:37,160][00307] Avg episode reward: [(0, '26.480')] -[2024-09-01 08:38:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6131712. Throughput: 0: 218.4. Samples: 1149824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:38:42,163][00307] Avg episode reward: [(0, '26.368')] -[2024-09-01 08:38:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6135808. Throughput: 0: 222.1. Samples: 1151290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:38:47,159][00307] Avg episode reward: [(0, '27.072')] -[2024-09-01 08:38:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6139904. Throughput: 0: 225.1. Samples: 1152134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:38:52,157][00307] Avg episode reward: [(0, '26.934')] -[2024-09-01 08:38:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6144000. Throughput: 0: 226.9. Samples: 1153412. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:38:57,160][00307] Avg episode reward: [(0, '26.406')] -[2024-09-01 08:39:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6148096. Throughput: 0: 218.8. Samples: 1154600. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:39:02,165][00307] Avg episode reward: [(0, '26.864')] -[2024-09-01 08:39:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6156288. Throughput: 0: 228.8. Samples: 1155568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:39:07,157][00307] Avg episode reward: [(0, '27.391')] -[2024-09-01 08:39:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6160384. Throughput: 0: 229.9. Samples: 1156654. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:12,158][00307] Avg episode reward: [(0, '27.613')] -[2024-09-01 08:39:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6164480. Throughput: 0: 215.2. Samples: 1157848. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:17,157][00307] Avg episode reward: [(0, '27.827')] -[2024-09-01 08:39:21,218][16881] Updated weights for policy 0, policy_version 1506 (0.2084) -[2024-09-01 08:39:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6168576. Throughput: 0: 220.2. Samples: 1158542. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:22,159][00307] Avg episode reward: [(0, '28.158')] -[2024-09-01 08:39:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6172672. Throughput: 0: 230.1. Samples: 1160180. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:27,167][00307] Avg episode reward: [(0, '28.577')] -[2024-09-01 08:39:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6176768. Throughput: 0: 229.4. Samples: 1161614. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:39:32,157][00307] Avg episode reward: [(0, '28.714')] -[2024-09-01 08:39:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6180864. Throughput: 0: 219.2. Samples: 1161996. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:39:37,161][00307] Avg episode reward: [(0, '29.132')] -[2024-09-01 08:39:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6184960. Throughput: 0: 224.2. Samples: 1163502. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:42,163][00307] Avg episode reward: [(0, '29.265')] -[2024-09-01 08:39:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6193152. Throughput: 0: 230.2. Samples: 1164958. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:47,165][00307] Avg episode reward: [(0, '29.844')] -[2024-09-01 08:39:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6197248. Throughput: 0: 224.7. Samples: 1165678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:39:52,161][00307] Avg episode reward: [(0, '29.780')] -[2024-09-01 08:39:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6201344. Throughput: 0: 226.6. Samples: 1166850. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:39:57,162][00307] Avg episode reward: [(0, '29.282')] -[2024-09-01 08:40:00,607][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001515_6205440.pth... -[2024-09-01 08:40:00,728][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001462_5988352.pth -[2024-09-01 08:40:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6205440. Throughput: 0: 237.1. Samples: 1168516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:40:02,168][00307] Avg episode reward: [(0, '29.435')] -[2024-09-01 08:40:04,648][16881] Updated weights for policy 0, policy_version 1516 (0.0533) -[2024-09-01 08:40:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6209536. Throughput: 0: 233.8. Samples: 1169064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:40:07,156][00307] Avg episode reward: [(0, '29.435')] -[2024-09-01 08:40:12,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6213632. Throughput: 0: 220.2. Samples: 1170090. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:40:12,168][00307] Avg episode reward: [(0, '29.972')] -[2024-09-01 08:40:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6217728. Throughput: 0: 227.3. Samples: 1171844. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:17,156][00307] Avg episode reward: [(0, '29.742')] -[2024-09-01 08:40:22,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6221824. Throughput: 0: 234.1. Samples: 1172532. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:22,157][00307] Avg episode reward: [(0, '29.563')] -[2024-09-01 08:40:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6225920. Throughput: 0: 228.0. Samples: 1173764. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:27,158][00307] Avg episode reward: [(0, '29.599')] -[2024-09-01 08:40:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6230016. Throughput: 0: 225.7. Samples: 1175116. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:32,163][00307] Avg episode reward: [(0, '29.438')] -[2024-09-01 08:40:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6238208. Throughput: 0: 229.8. Samples: 1176020. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:40:37,156][00307] Avg episode reward: [(0, '30.237')] -[2024-09-01 08:40:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6242304. Throughput: 0: 229.8. Samples: 1177192. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:42,162][00307] Avg episode reward: [(0, '29.290')] -[2024-09-01 08:40:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6246400. Throughput: 0: 215.5. Samples: 1178214. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:40:47,160][00307] Avg episode reward: [(0, '28.358')] -[2024-09-01 08:40:50,956][16881] Updated weights for policy 0, policy_version 1526 (0.1114) -[2024-09-01 08:40:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6250496. Throughput: 0: 226.0. Samples: 1179234. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:40:52,160][00307] Avg episode reward: [(0, '27.599')] -[2024-09-01 08:40:53,201][16868] Signal inference workers to stop experience collection... (1150 times) -[2024-09-01 08:40:53,257][16881] InferenceWorker_p0-w0: stopping experience collection (1150 times) -[2024-09-01 08:40:54,715][16868] Signal inference workers to resume experience collection... (1150 times) -[2024-09-01 08:40:54,715][16881] InferenceWorker_p0-w0: resuming experience collection (1150 times) -[2024-09-01 08:40:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6254592. Throughput: 0: 237.6. Samples: 1180780. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:40:57,161][00307] Avg episode reward: [(0, '27.966')] -[2024-09-01 08:41:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6258688. Throughput: 0: 222.7. Samples: 1181864. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:41:02,161][00307] Avg episode reward: [(0, '28.152')] -[2024-09-01 08:41:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6262784. Throughput: 0: 216.3. Samples: 1182264. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:41:07,157][00307] Avg episode reward: [(0, '28.150')] -[2024-09-01 08:41:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6266880. Throughput: 0: 228.1. Samples: 1184028. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:12,157][00307] Avg episode reward: [(0, '27.818')] -[2024-09-01 08:41:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6270976. Throughput: 0: 226.2. Samples: 1185294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:17,159][00307] Avg episode reward: [(0, '27.066')] -[2024-09-01 08:41:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6275072. Throughput: 0: 217.2. Samples: 1185792. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:41:22,165][00307] Avg episode reward: [(0, '27.359')] -[2024-09-01 08:41:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6283264. Throughput: 0: 225.4. Samples: 1187334. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:41:27,157][00307] Avg episode reward: [(0, '27.426')] -[2024-09-01 08:41:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6287360. Throughput: 0: 236.8. Samples: 1188870. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:41:32,158][00307] Avg episode reward: [(0, '26.750')] -[2024-09-01 08:41:35,977][16881] Updated weights for policy 0, policy_version 1536 (0.0550) -[2024-09-01 08:41:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6291456. Throughput: 0: 225.7. Samples: 1189390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:37,161][00307] Avg episode reward: [(0, '26.334')] -[2024-09-01 08:41:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6295552. Throughput: 0: 212.7. Samples: 1190352. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:42,168][00307] Avg episode reward: [(0, '25.972')] -[2024-09-01 08:41:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6299648. Throughput: 0: 231.2. Samples: 1192266. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:47,160][00307] Avg episode reward: [(0, '25.189')] -[2024-09-01 08:41:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6303744. Throughput: 0: 235.6. Samples: 1192864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:41:52,162][00307] Avg episode reward: [(0, '25.381')] -[2024-09-01 08:41:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6307840. Throughput: 0: 219.1. Samples: 1193886. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:41:57,160][00307] Avg episode reward: [(0, '25.471')] -[2024-09-01 08:41:59,100][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001541_6311936.pth... -[2024-09-01 08:41:59,215][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001489_6098944.pth -[2024-09-01 08:42:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6311936. Throughput: 0: 226.2. Samples: 1195474. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:42:02,164][00307] Avg episode reward: [(0, '26.324')] -[2024-09-01 08:42:07,155][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6320128. Throughput: 0: 234.3. Samples: 1196336. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:42:07,162][00307] Avg episode reward: [(0, '26.850')] -[2024-09-01 08:42:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6320128. Throughput: 0: 226.1. Samples: 1197508. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:42:12,156][00307] Avg episode reward: [(0, '27.236')] -[2024-09-01 08:42:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6324224. Throughput: 0: 215.3. Samples: 1198560. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:42:17,165][00307] Avg episode reward: [(0, '27.062')] -[2024-09-01 08:42:21,160][16881] Updated weights for policy 0, policy_version 1546 (0.0037) -[2024-09-01 08:42:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6332416. Throughput: 0: 226.3. Samples: 1199572. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:42:22,159][00307] Avg episode reward: [(0, '27.052')] -[2024-09-01 08:42:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6336512. Throughput: 0: 229.1. Samples: 1200662. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:42:27,159][00307] Avg episode reward: [(0, '27.052')] -[2024-09-01 08:42:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6340608. Throughput: 0: 213.4. Samples: 1201868. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:42:32,161][00307] Avg episode reward: [(0, '26.605')] -[2024-09-01 08:42:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6344704. Throughput: 0: 217.4. Samples: 1202648. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:42:37,157][00307] Avg episode reward: [(0, '27.051')] -[2024-09-01 08:42:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6348800. Throughput: 0: 231.4. Samples: 1204300. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:42:42,166][00307] Avg episode reward: [(0, '27.364')] -[2024-09-01 08:42:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6352896. Throughput: 0: 217.2. Samples: 1205248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:42:47,161][00307] Avg episode reward: [(0, '27.833')] -[2024-09-01 08:42:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6356992. Throughput: 0: 209.2. Samples: 1205750. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:42:52,157][00307] Avg episode reward: [(0, '28.136')] -[2024-09-01 08:42:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6361088. Throughput: 0: 225.0. Samples: 1207634. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:42:57,164][00307] Avg episode reward: [(0, '28.280')] -[2024-09-01 08:43:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6365184. Throughput: 0: 228.2. Samples: 1208828. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:02,157][00307] Avg episode reward: [(0, '28.681')] -[2024-09-01 08:43:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6369280. Throughput: 0: 217.2. Samples: 1209348. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:07,166][00307] Avg episode reward: [(0, '29.701')] -[2024-09-01 08:43:08,991][16881] Updated weights for policy 0, policy_version 1556 (0.1488) -[2024-09-01 08:43:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6373376. Throughput: 0: 219.3. Samples: 1210532. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:12,165][00307] Avg episode reward: [(0, '29.609')] -[2024-09-01 08:43:17,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6377472. Throughput: 0: 221.8. Samples: 1211848. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:17,165][00307] Avg episode reward: [(0, '29.565')] -[2024-09-01 08:43:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6381568. Throughput: 0: 222.2. Samples: 1212648. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:22,159][00307] Avg episode reward: [(0, '30.102')] -[2024-09-01 08:43:27,154][00307] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6385664. Throughput: 0: 208.8. Samples: 1213696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:43:27,162][00307] Avg episode reward: [(0, '30.665')] -[2024-09-01 08:43:31,306][16868] Saving new best policy, reward=30.665! -[2024-09-01 08:43:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6393856. Throughput: 0: 221.1. Samples: 1215198. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:43:32,161][00307] Avg episode reward: [(0, '30.472')] -[2024-09-01 08:43:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6397952. Throughput: 0: 225.0. Samples: 1215874. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:43:37,160][00307] Avg episode reward: [(0, '30.357')] -[2024-09-01 08:43:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6402048. Throughput: 0: 206.5. Samples: 1216926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:43:42,157][00307] Avg episode reward: [(0, '29.948')] -[2024-09-01 08:43:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6406144. Throughput: 0: 212.9. Samples: 1218408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:43:47,159][00307] Avg episode reward: [(0, '30.434')] -[2024-09-01 08:43:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6410240. Throughput: 0: 215.7. Samples: 1219054. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:43:52,162][00307] Avg episode reward: [(0, '30.075')] -[2024-09-01 08:43:54,289][16881] Updated weights for policy 0, policy_version 1566 (0.0741) -[2024-09-01 08:43:57,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6414336. Throughput: 0: 219.2. Samples: 1220396. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:43:57,160][00307] Avg episode reward: [(0, '29.781')] -[2024-09-01 08:43:59,530][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001567_6418432.pth... -[2024-09-01 08:43:59,651][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001515_6205440.pth -[2024-09-01 08:44:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6418432. Throughput: 0: 219.4. Samples: 1221718. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:44:02,159][00307] Avg episode reward: [(0, '29.849')] -[2024-09-01 08:44:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6422528. Throughput: 0: 217.0. Samples: 1222412. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:07,166][00307] Avg episode reward: [(0, '29.727')] -[2024-09-01 08:44:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6430720. Throughput: 0: 230.7. Samples: 1224078. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:12,160][00307] Avg episode reward: [(0, '29.687')] -[2024-09-01 08:44:17,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6430720. Throughput: 0: 219.7. Samples: 1225084. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:17,162][00307] Avg episode reward: [(0, '28.373')] -[2024-09-01 08:44:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6434816. Throughput: 0: 218.8. Samples: 1225718. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:22,164][00307] Avg episode reward: [(0, '27.523')] -[2024-09-01 08:44:27,154][00307] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6443008. Throughput: 0: 227.5. Samples: 1227164. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:27,169][00307] Avg episode reward: [(0, '27.257')] -[2024-09-01 08:44:32,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6447104. Throughput: 0: 224.6. Samples: 1228516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:44:32,162][00307] Avg episode reward: [(0, '27.595')] -[2024-09-01 08:44:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6451200. Throughput: 0: 225.6. Samples: 1229206. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:44:37,158][00307] Avg episode reward: [(0, '26.519')] -[2024-09-01 08:44:40,357][16881] Updated weights for policy 0, policy_version 1576 (0.1745) -[2024-09-01 08:44:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6455296. Throughput: 0: 224.6. Samples: 1230502. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:42,160][00307] Avg episode reward: [(0, '24.841')] -[2024-09-01 08:44:42,690][16868] Signal inference workers to stop experience collection... (1200 times) -[2024-09-01 08:44:42,735][16881] InferenceWorker_p0-w0: stopping experience collection (1200 times) -[2024-09-01 08:44:44,199][16868] Signal inference workers to resume experience collection... (1200 times) -[2024-09-01 08:44:44,201][16881] InferenceWorker_p0-w0: resuming experience collection (1200 times) -[2024-09-01 08:44:47,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6459392. Throughput: 0: 232.8. Samples: 1232196. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:47,176][00307] Avg episode reward: [(0, '24.690')] -[2024-09-01 08:44:52,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6463488. Throughput: 0: 223.9. Samples: 1232490. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:52,159][00307] Avg episode reward: [(0, '23.535')] -[2024-09-01 08:44:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6467584. Throughput: 0: 218.2. Samples: 1233898. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:44:57,157][00307] Avg episode reward: [(0, '22.892')] -[2024-09-01 08:45:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6471680. Throughput: 0: 230.2. Samples: 1235440. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:02,165][00307] Avg episode reward: [(0, '22.842')] -[2024-09-01 08:45:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6475776. Throughput: 0: 229.2. Samples: 1236034. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:07,157][00307] Avg episode reward: [(0, '22.773')] -[2024-09-01 08:45:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6479872. Throughput: 0: 222.0. Samples: 1237154. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:12,163][00307] Avg episode reward: [(0, '22.731')] -[2024-09-01 08:45:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 6488064. Throughput: 0: 221.1. Samples: 1238466. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:45:17,156][00307] Avg episode reward: [(0, '22.168')] -[2024-09-01 08:45:22,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6492160. Throughput: 0: 229.6. Samples: 1239536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:45:22,160][00307] Avg episode reward: [(0, '21.978')] -[2024-09-01 08:45:26,299][16881] Updated weights for policy 0, policy_version 1586 (0.0550) -[2024-09-01 08:45:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6496256. Throughput: 0: 222.8. Samples: 1240530. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:45:27,157][00307] Avg episode reward: [(0, '21.727')] -[2024-09-01 08:45:32,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6500352. Throughput: 0: 211.6. Samples: 1241716. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:45:32,157][00307] Avg episode reward: [(0, '22.590')] -[2024-09-01 08:45:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6504448. Throughput: 0: 225.8. Samples: 1242652. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:37,156][00307] Avg episode reward: [(0, '23.572')] -[2024-09-01 08:45:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6508544. Throughput: 0: 227.9. Samples: 1244152. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:42,160][00307] Avg episode reward: [(0, '23.180')] -[2024-09-01 08:45:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6512640. Throughput: 0: 215.3. Samples: 1245128. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:45:47,158][00307] Avg episode reward: [(0, '23.203')] -[2024-09-01 08:45:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6516736. Throughput: 0: 218.0. Samples: 1245844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:45:52,159][00307] Avg episode reward: [(0, '23.898')] -[2024-09-01 08:45:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6520832. Throughput: 0: 235.8. Samples: 1247766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:45:57,156][00307] Avg episode reward: [(0, '23.624')] -[2024-09-01 08:46:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6524928. Throughput: 0: 230.7. Samples: 1248848. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:46:02,157][00307] Avg episode reward: [(0, '23.610')] -[2024-09-01 08:46:03,358][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001594_6529024.pth... -[2024-09-01 08:46:03,516][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001541_6311936.pth -[2024-09-01 08:46:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6529024. Throughput: 0: 216.0. Samples: 1249254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:07,164][00307] Avg episode reward: [(0, '23.475')] -[2024-09-01 08:46:11,790][16881] Updated weights for policy 0, policy_version 1596 (0.0542) -[2024-09-01 08:46:12,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6537216. Throughput: 0: 229.3. Samples: 1250848. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:46:12,157][00307] Avg episode reward: [(0, '23.202')] -[2024-09-01 08:46:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6541312. Throughput: 0: 233.1. Samples: 1252206. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:46:17,157][00307] Avg episode reward: [(0, '23.417')] -[2024-09-01 08:46:22,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6545408. Throughput: 0: 226.8. Samples: 1252858. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:22,162][00307] Avg episode reward: [(0, '23.812')] -[2024-09-01 08:46:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6549504. Throughput: 0: 216.2. Samples: 1253880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:27,156][00307] Avg episode reward: [(0, '23.456')] -[2024-09-01 08:46:32,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6553600. Throughput: 0: 237.4. Samples: 1255810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:46:32,164][00307] Avg episode reward: [(0, '24.050')] -[2024-09-01 08:46:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6557696. Throughput: 0: 229.7. Samples: 1256180. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:46:37,160][00307] Avg episode reward: [(0, '24.006')] -[2024-09-01 08:46:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6561792. Throughput: 0: 210.4. Samples: 1257234. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:42,163][00307] Avg episode reward: [(0, '23.386')] -[2024-09-01 08:46:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6565888. Throughput: 0: 222.0. Samples: 1258838. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:47,157][00307] Avg episode reward: [(0, '24.410')] -[2024-09-01 08:46:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6569984. Throughput: 0: 227.5. Samples: 1259490. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:52,157][00307] Avg episode reward: [(0, '24.968')] -[2024-09-01 08:46:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6574080. Throughput: 0: 219.9. Samples: 1260742. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:46:57,160][00307] Avg episode reward: [(0, '25.122')] -[2024-09-01 08:46:58,661][16881] Updated weights for policy 0, policy_version 1606 (0.1061) -[2024-09-01 08:47:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 6578176. Throughput: 0: 219.2. Samples: 1262070. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:47:02,166][00307] Avg episode reward: [(0, '25.174')] -[2024-09-01 08:47:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6586368. Throughput: 0: 227.4. Samples: 1263092. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:47:07,159][00307] Avg episode reward: [(0, '25.848')] -[2024-09-01 08:47:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6590464. Throughput: 0: 227.3. Samples: 1264108. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:47:12,159][00307] Avg episode reward: [(0, '25.841')] -[2024-09-01 08:47:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6594560. Throughput: 0: 210.2. Samples: 1265270. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:17,159][00307] Avg episode reward: [(0, '25.836')] -[2024-09-01 08:47:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6598656. Throughput: 0: 221.8. Samples: 1266162. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:22,156][00307] Avg episode reward: [(0, '25.791')] -[2024-09-01 08:47:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6602752. Throughput: 0: 234.0. Samples: 1267764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:27,161][00307] Avg episode reward: [(0, '25.188')] -[2024-09-01 08:47:32,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6606848. Throughput: 0: 220.6. Samples: 1268764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:32,159][00307] Avg episode reward: [(0, '25.708')] -[2024-09-01 08:47:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6610944. Throughput: 0: 219.4. Samples: 1269364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:37,166][00307] Avg episode reward: [(0, '25.453')] -[2024-09-01 08:47:42,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6615040. Throughput: 0: 233.2. Samples: 1271236. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:42,158][00307] Avg episode reward: [(0, '24.766')] -[2024-09-01 08:47:42,917][16881] Updated weights for policy 0, policy_version 1616 (0.1067) -[2024-09-01 08:47:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6619136. Throughput: 0: 226.1. Samples: 1272244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:47,158][00307] Avg episode reward: [(0, '24.978')] -[2024-09-01 08:47:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6623232. Throughput: 0: 214.0. Samples: 1272720. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:47:52,157][00307] Avg episode reward: [(0, '24.717')] -[2024-09-01 08:47:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6631424. Throughput: 0: 227.0. Samples: 1274324. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:47:57,156][00307] Avg episode reward: [(0, '25.686')] -[2024-09-01 08:48:00,942][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001620_6635520.pth... -[2024-09-01 08:48:01,061][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001567_6418432.pth -[2024-09-01 08:48:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6635520. Throughput: 0: 231.2. Samples: 1275676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:02,156][00307] Avg episode reward: [(0, '25.731')] -[2024-09-01 08:48:07,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6639616. Throughput: 0: 226.9. Samples: 1276374. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:07,163][00307] Avg episode reward: [(0, '25.793')] -[2024-09-01 08:48:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6643712. Throughput: 0: 213.2. Samples: 1277358. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:12,158][00307] Avg episode reward: [(0, '26.712')] -[2024-09-01 08:48:17,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6647808. Throughput: 0: 228.2. Samples: 1279034. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:17,159][00307] Avg episode reward: [(0, '26.479')] -[2024-09-01 08:48:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6651904. Throughput: 0: 226.4. Samples: 1279554. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:48:22,160][00307] Avg episode reward: [(0, '26.479')] -[2024-09-01 08:48:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6656000. Throughput: 0: 207.8. Samples: 1280588. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:48:27,170][00307] Avg episode reward: [(0, '27.024')] -[2024-09-01 08:48:30,033][16881] Updated weights for policy 0, policy_version 1626 (0.0057) -[2024-09-01 08:48:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6660096. Throughput: 0: 225.5. Samples: 1282390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:32,159][00307] Avg episode reward: [(0, '26.765')] -[2024-09-01 08:48:32,325][16868] Signal inference workers to stop experience collection... (1250 times) -[2024-09-01 08:48:32,372][16881] InferenceWorker_p0-w0: stopping experience collection (1250 times) -[2024-09-01 08:48:33,830][16868] Signal inference workers to resume experience collection... (1250 times) -[2024-09-01 08:48:33,831][16881] InferenceWorker_p0-w0: resuming experience collection (1250 times) -[2024-09-01 08:48:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6664192. Throughput: 0: 228.8. Samples: 1283016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:37,162][00307] Avg episode reward: [(0, '27.459')] -[2024-09-01 08:48:42,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6668288. Throughput: 0: 218.9. Samples: 1284176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:48:42,162][00307] Avg episode reward: [(0, '27.557')] -[2024-09-01 08:48:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6672384. Throughput: 0: 217.8. Samples: 1285478. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:48:47,159][00307] Avg episode reward: [(0, '27.617')] -[2024-09-01 08:48:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6676480. Throughput: 0: 219.4. Samples: 1286246. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:52,166][00307] Avg episode reward: [(0, '27.878')] -[2024-09-01 08:48:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6680576. Throughput: 0: 229.5. Samples: 1287684. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:48:57,160][00307] Avg episode reward: [(0, '27.885')] -[2024-09-01 08:49:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6684672. Throughput: 0: 214.1. Samples: 1288668. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:49:02,163][00307] Avg episode reward: [(0, '27.637')] -[2024-09-01 08:49:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6692864. Throughput: 0: 222.4. Samples: 1289560. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:49:07,169][00307] Avg episode reward: [(0, '27.862')] -[2024-09-01 08:49:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6696960. Throughput: 0: 228.8. Samples: 1290886. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:49:12,159][00307] Avg episode reward: [(0, '27.411')] -[2024-09-01 08:49:16,664][16881] Updated weights for policy 0, policy_version 1636 (0.1227) -[2024-09-01 08:49:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6701056. Throughput: 0: 209.6. Samples: 1291824. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:17,159][00307] Avg episode reward: [(0, '27.465')] -[2024-09-01 08:49:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6705152. Throughput: 0: 211.0. Samples: 1292510. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:22,156][00307] Avg episode reward: [(0, '27.348')] -[2024-09-01 08:49:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6709248. Throughput: 0: 222.9. Samples: 1294206. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:27,165][00307] Avg episode reward: [(0, '26.909')] -[2024-09-01 08:49:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6713344. Throughput: 0: 227.5. Samples: 1295716. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:32,159][00307] Avg episode reward: [(0, '27.014')] -[2024-09-01 08:49:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6717440. Throughput: 0: 217.6. Samples: 1296040. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:37,159][00307] Avg episode reward: [(0, '26.579')] -[2024-09-01 08:49:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6721536. Throughput: 0: 219.8. Samples: 1297574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:42,156][00307] Avg episode reward: [(0, '26.436')] -[2024-09-01 08:49:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6729728. Throughput: 0: 227.3. Samples: 1298896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:47,166][00307] Avg episode reward: [(0, '26.344')] -[2024-09-01 08:49:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6729728. Throughput: 0: 224.8. Samples: 1299678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:49:52,157][00307] Avg episode reward: [(0, '26.583')] -[2024-09-01 08:49:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6733824. Throughput: 0: 220.7. Samples: 1300818. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 08:49:57,168][00307] Avg episode reward: [(0, '26.761')] -[2024-09-01 08:50:01,485][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001646_6742016.pth... -[2024-09-01 08:50:01,499][16881] Updated weights for policy 0, policy_version 1646 (0.0565) -[2024-09-01 08:50:01,591][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001594_6529024.pth -[2024-09-01 08:50:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6742016. Throughput: 0: 231.9. Samples: 1302260. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:02,157][00307] Avg episode reward: [(0, '26.461')] -[2024-09-01 08:50:07,159][00307] Fps is (10 sec: 1228.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6746112. Throughput: 0: 234.9. Samples: 1303080. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:07,168][00307] Avg episode reward: [(0, '26.263')] -[2024-09-01 08:50:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6750208. Throughput: 0: 219.4. Samples: 1304078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:12,156][00307] Avg episode reward: [(0, '26.086')] -[2024-09-01 08:50:17,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6754304. Throughput: 0: 220.9. Samples: 1305656. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:17,157][00307] Avg episode reward: [(0, '26.018')] -[2024-09-01 08:50:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6758400. Throughput: 0: 228.1. Samples: 1306304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:22,158][00307] Avg episode reward: [(0, '26.051')] -[2024-09-01 08:50:27,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 6762496. Throughput: 0: 227.1. Samples: 1307792. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:50:27,162][00307] Avg episode reward: [(0, '25.522')] -[2024-09-01 08:50:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6766592. Throughput: 0: 226.4. Samples: 1309086. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:50:32,162][00307] Avg episode reward: [(0, '25.406')] -[2024-09-01 08:50:37,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6770688. Throughput: 0: 226.0. Samples: 1309850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:50:37,157][00307] Avg episode reward: [(0, '25.398')] -[2024-09-01 08:50:42,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6778880. Throughput: 0: 234.0. Samples: 1311346. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:50:42,163][00307] Avg episode reward: [(0, '25.157')] -[2024-09-01 08:50:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6778880. Throughput: 0: 224.7. Samples: 1312370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 08:50:47,157][00307] Avg episode reward: [(0, '25.118')] -[2024-09-01 08:50:47,247][16881] Updated weights for policy 0, policy_version 1656 (0.0042) -[2024-09-01 08:50:52,157][00307] Fps is (10 sec: 819.0, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6787072. Throughput: 0: 227.3. Samples: 1313308. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:50:52,162][00307] Avg episode reward: [(0, '25.613')] -[2024-09-01 08:50:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6791168. Throughput: 0: 235.6. Samples: 1314682. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:50:57,162][00307] Avg episode reward: [(0, '25.157')] -[2024-09-01 08:51:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6795264. Throughput: 0: 229.9. Samples: 1316000. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:51:02,162][00307] Avg episode reward: [(0, '25.307')] -[2024-09-01 08:51:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6799360. Throughput: 0: 224.8. Samples: 1316420. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:51:07,156][00307] Avg episode reward: [(0, '25.238')] -[2024-09-01 08:51:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6803456. Throughput: 0: 225.9. Samples: 1317958. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 08:51:12,164][00307] Avg episode reward: [(0, '25.484')] -[2024-09-01 08:51:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6807552. Throughput: 0: 230.0. Samples: 1319438. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:51:17,157][00307] Avg episode reward: [(0, '25.784')] -[2024-09-01 08:51:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6811648. Throughput: 0: 225.8. Samples: 1320010. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:51:22,158][00307] Avg episode reward: [(0, '25.520')] -[2024-09-01 08:51:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6815744. Throughput: 0: 223.2. Samples: 1321392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:51:27,156][00307] Avg episode reward: [(0, '25.556')] -[2024-09-01 08:51:31,579][16881] Updated weights for policy 0, policy_version 1666 (0.1518) -[2024-09-01 08:51:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6823936. Throughput: 0: 230.5. Samples: 1322742. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:51:32,156][00307] Avg episode reward: [(0, '26.328')] -[2024-09-01 08:51:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6828032. Throughput: 0: 227.6. Samples: 1323548. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:51:37,159][00307] Avg episode reward: [(0, '26.567')] -[2024-09-01 08:51:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6832128. Throughput: 0: 218.8. Samples: 1324530. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:51:42,157][00307] Avg episode reward: [(0, '26.567')] -[2024-09-01 08:51:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6836224. Throughput: 0: 224.6. Samples: 1326106. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:51:47,157][00307] Avg episode reward: [(0, '26.488')] -[2024-09-01 08:51:52,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6840320. Throughput: 0: 233.6. Samples: 1326934. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:51:52,162][00307] Avg episode reward: [(0, '27.235')] -[2024-09-01 08:51:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6844416. Throughput: 0: 222.1. Samples: 1327954. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:51:57,162][00307] Avg episode reward: [(0, '27.008')] -[2024-09-01 08:51:59,480][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001672_6848512.pth... -[2024-09-01 08:51:59,591][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001620_6635520.pth -[2024-09-01 08:52:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6848512. Throughput: 0: 224.3. Samples: 1329530. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:52:02,164][00307] Avg episode reward: [(0, '27.904')] -[2024-09-01 08:52:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6852608. Throughput: 0: 222.0. Samples: 1330000. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:52:07,166][00307] Avg episode reward: [(0, '27.945')] -[2024-09-01 08:52:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6856704. Throughput: 0: 225.9. Samples: 1331558. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:52:12,157][00307] Avg episode reward: [(0, '27.783')] -[2024-09-01 08:52:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6860800. Throughput: 0: 221.6. Samples: 1332714. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:52:17,157][00307] Avg episode reward: [(0, '27.656')] -[2024-09-01 08:52:18,479][16881] Updated weights for policy 0, policy_version 1676 (0.1559) -[2024-09-01 08:52:20,847][16868] Signal inference workers to stop experience collection... (1300 times) -[2024-09-01 08:52:20,933][16881] InferenceWorker_p0-w0: stopping experience collection (1300 times) -[2024-09-01 08:52:21,981][16868] Signal inference workers to resume experience collection... (1300 times) -[2024-09-01 08:52:21,982][16881] InferenceWorker_p0-w0: resuming experience collection (1300 times) -[2024-09-01 08:52:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6868992. Throughput: 0: 218.6. Samples: 1333386. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:52:22,156][00307] Avg episode reward: [(0, '27.572')] -[2024-09-01 08:52:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6873088. Throughput: 0: 228.7. Samples: 1334822. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:27,157][00307] Avg episode reward: [(0, '27.593')] -[2024-09-01 08:52:32,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6877184. Throughput: 0: 218.5. Samples: 1335938. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:32,166][00307] Avg episode reward: [(0, '28.491')] -[2024-09-01 08:52:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6881280. Throughput: 0: 219.3. Samples: 1336804. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:37,157][00307] Avg episode reward: [(0, '29.110')] -[2024-09-01 08:52:42,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6885376. Throughput: 0: 227.2. Samples: 1338176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:42,157][00307] Avg episode reward: [(0, '28.543')] -[2024-09-01 08:52:47,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 6889472. Throughput: 0: 220.9. Samples: 1339470. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:47,164][00307] Avg episode reward: [(0, '28.999')] -[2024-09-01 08:52:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6893568. Throughput: 0: 221.2. Samples: 1339956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:52,165][00307] Avg episode reward: [(0, '28.445')] -[2024-09-01 08:52:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6897664. Throughput: 0: 223.2. Samples: 1341602. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:52:57,166][00307] Avg episode reward: [(0, '28.781')] -[2024-09-01 08:53:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6901760. Throughput: 0: 229.3. Samples: 1343032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:53:02,156][00307] Avg episode reward: [(0, '28.715')] -[2024-09-01 08:53:02,563][16881] Updated weights for policy 0, policy_version 1686 (0.1225) -[2024-09-01 08:53:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6905856. Throughput: 0: 227.6. Samples: 1343626. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:53:07,157][00307] Avg episode reward: [(0, '28.097')] -[2024-09-01 08:53:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6909952. Throughput: 0: 222.8. Samples: 1344846. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:53:12,167][00307] Avg episode reward: [(0, '29.014')] -[2024-09-01 08:53:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6918144. Throughput: 0: 230.9. Samples: 1346328. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 08:53:17,157][00307] Avg episode reward: [(0, '28.554')] -[2024-09-01 08:53:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6922240. Throughput: 0: 226.3. Samples: 1346986. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:53:22,159][00307] Avg episode reward: [(0, '27.578')] -[2024-09-01 08:53:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6926336. Throughput: 0: 217.5. Samples: 1347964. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:53:27,163][00307] Avg episode reward: [(0, '27.886')] -[2024-09-01 08:53:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 6930432. Throughput: 0: 227.3. Samples: 1349696. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:53:32,156][00307] Avg episode reward: [(0, '27.797')] -[2024-09-01 08:53:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6934528. Throughput: 0: 230.6. Samples: 1350334. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:53:37,158][00307] Avg episode reward: [(0, '27.870')] -[2024-09-01 08:53:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6938624. Throughput: 0: 220.6. Samples: 1351530. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:53:42,158][00307] Avg episode reward: [(0, '27.858')] -[2024-09-01 08:53:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6942720. Throughput: 0: 221.6. Samples: 1353004. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:53:47,167][00307] Avg episode reward: [(0, '27.415')] -[2024-09-01 08:53:48,654][16881] Updated weights for policy 0, policy_version 1696 (0.2216) -[2024-09-01 08:53:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6950912. Throughput: 0: 221.6. Samples: 1353598. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:53:52,157][00307] Avg episode reward: [(0, '27.837')] -[2024-09-01 08:53:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6955008. Throughput: 0: 227.0. Samples: 1355060. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:53:57,164][00307] Avg episode reward: [(0, '28.811')] -[2024-09-01 08:54:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6955008. Throughput: 0: 218.0. Samples: 1356140. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:02,164][00307] Avg episode reward: [(0, '28.144')] -[2024-09-01 08:54:02,648][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001699_6959104.pth... -[2024-09-01 08:54:02,763][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001646_6742016.pth -[2024-09-01 08:54:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6963200. Throughput: 0: 225.3. Samples: 1357124. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:07,157][00307] Avg episode reward: [(0, '27.896')] -[2024-09-01 08:54:12,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 6967296. Throughput: 0: 228.0. Samples: 1358222. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:12,166][00307] Avg episode reward: [(0, '27.263')] -[2024-09-01 08:54:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6971392. Throughput: 0: 211.7. Samples: 1359224. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:54:17,162][00307] Avg episode reward: [(0, '26.568')] -[2024-09-01 08:54:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 6971392. Throughput: 0: 212.6. Samples: 1359902. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:54:22,159][00307] Avg episode reward: [(0, '26.599')] -[2024-09-01 08:54:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6979584. Throughput: 0: 215.8. Samples: 1361242. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:54:27,156][00307] Avg episode reward: [(0, '26.067')] -[2024-09-01 08:54:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6983680. Throughput: 0: 219.4. Samples: 1362876. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:54:32,161][00307] Avg episode reward: [(0, '26.048')] -[2024-09-01 08:54:36,067][16881] Updated weights for policy 0, policy_version 1706 (0.2263) -[2024-09-01 08:54:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6987776. Throughput: 0: 213.8. Samples: 1363220. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:37,161][00307] Avg episode reward: [(0, '26.048')] -[2024-09-01 08:54:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 6991872. Throughput: 0: 210.0. Samples: 1364512. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:42,158][00307] Avg episode reward: [(0, '25.963')] -[2024-09-01 08:54:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 6995968. Throughput: 0: 228.0. Samples: 1366402. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:47,163][00307] Avg episode reward: [(0, '26.387')] -[2024-09-01 08:54:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 7000064. Throughput: 0: 215.2. Samples: 1366808. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:52,158][00307] Avg episode reward: [(0, '25.977')] -[2024-09-01 08:54:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7004160. Throughput: 0: 214.0. Samples: 1367854. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:54:57,157][00307] Avg episode reward: [(0, '25.929')] -[2024-09-01 08:55:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7008256. Throughput: 0: 227.6. Samples: 1369468. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:55:02,167][00307] Avg episode reward: [(0, '26.984')] -[2024-09-01 08:55:07,166][00307] Fps is (10 sec: 1227.3, 60 sec: 887.3, 300 sec: 902.5). Total num frames: 7016448. Throughput: 0: 229.4. Samples: 1370230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:07,172][00307] Avg episode reward: [(0, '26.808')] -[2024-09-01 08:55:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7016448. Throughput: 0: 227.5. Samples: 1371478. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:12,159][00307] Avg episode reward: [(0, '26.495')] -[2024-09-01 08:55:17,154][00307] Fps is (10 sec: 820.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7024640. Throughput: 0: 216.1. Samples: 1372602. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:17,158][00307] Avg episode reward: [(0, '26.495')] -[2024-09-01 08:55:20,772][16881] Updated weights for policy 0, policy_version 1716 (0.1042) -[2024-09-01 08:55:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7028736. Throughput: 0: 230.8. Samples: 1373608. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:22,157][00307] Avg episode reward: [(0, '25.692')] -[2024-09-01 08:55:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7032832. Throughput: 0: 228.8. Samples: 1374808. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:27,157][00307] Avg episode reward: [(0, '26.076')] -[2024-09-01 08:55:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7036928. Throughput: 0: 214.1. Samples: 1376036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:32,162][00307] Avg episode reward: [(0, '27.154')] -[2024-09-01 08:55:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7041024. Throughput: 0: 222.6. Samples: 1376824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:55:37,157][00307] Avg episode reward: [(0, '27.684')] -[2024-09-01 08:55:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7045120. Throughput: 0: 235.2. Samples: 1378440. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:55:42,158][00307] Avg episode reward: [(0, '27.665')] -[2024-09-01 08:55:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7049216. Throughput: 0: 224.2. Samples: 1379556. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:55:47,158][00307] Avg episode reward: [(0, '27.622')] -[2024-09-01 08:55:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7053312. Throughput: 0: 217.5. Samples: 1380014. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:55:52,167][00307] Avg episode reward: [(0, '27.622')] -[2024-09-01 08:55:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7061504. Throughput: 0: 229.2. Samples: 1381792. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:55:57,157][00307] Avg episode reward: [(0, '27.763')] -[2024-09-01 08:56:02,122][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001725_7065600.pth... -[2024-09-01 08:56:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7065600. Throughput: 0: 226.3. Samples: 1382786. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:56:02,158][00307] Avg episode reward: [(0, '27.520')] -[2024-09-01 08:56:02,311][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001672_6848512.pth -[2024-09-01 08:56:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.4, 300 sec: 888.6). Total num frames: 7065600. Throughput: 0: 218.2. Samples: 1383428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:56:07,159][00307] Avg episode reward: [(0, '27.139')] -[2024-09-01 08:56:07,599][16881] Updated weights for policy 0, policy_version 1726 (0.0540) -[2024-09-01 08:56:09,967][16868] Signal inference workers to stop experience collection... (1350 times) -[2024-09-01 08:56:10,021][16881] InferenceWorker_p0-w0: stopping experience collection (1350 times) -[2024-09-01 08:56:11,437][16868] Signal inference workers to resume experience collection... (1350 times) -[2024-09-01 08:56:11,439][16881] InferenceWorker_p0-w0: resuming experience collection (1350 times) -[2024-09-01 08:56:12,156][00307] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7073792. Throughput: 0: 224.1. Samples: 1384894. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:12,158][00307] Avg episode reward: [(0, '27.054')] -[2024-09-01 08:56:17,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7077888. Throughput: 0: 232.9. Samples: 1386516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:17,159][00307] Avg episode reward: [(0, '27.010')] -[2024-09-01 08:56:22,158][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7081984. Throughput: 0: 223.9. Samples: 1386902. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:22,169][00307] Avg episode reward: [(0, '27.010')] -[2024-09-01 08:56:27,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7086080. Throughput: 0: 212.5. Samples: 1388002. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:27,156][00307] Avg episode reward: [(0, '26.729')] -[2024-09-01 08:56:32,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7090176. Throughput: 0: 230.2. Samples: 1389914. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:32,156][00307] Avg episode reward: [(0, '27.519')] -[2024-09-01 08:56:37,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7094272. Throughput: 0: 228.6. Samples: 1390302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:56:37,159][00307] Avg episode reward: [(0, '27.665')] -[2024-09-01 08:56:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7098368. Throughput: 0: 214.0. Samples: 1391420. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 08:56:42,159][00307] Avg episode reward: [(0, '28.154')] -[2024-09-01 08:56:47,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7102464. Throughput: 0: 225.4. Samples: 1392928. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:56:47,162][00307] Avg episode reward: [(0, '27.764')] -[2024-09-01 08:56:51,672][16881] Updated weights for policy 0, policy_version 1736 (0.2016) -[2024-09-01 08:56:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7110656. Throughput: 0: 227.1. Samples: 1393646. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:56:52,157][00307] Avg episode reward: [(0, '28.280')] -[2024-09-01 08:56:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7110656. Throughput: 0: 222.7. Samples: 1394914. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:56:57,158][00307] Avg episode reward: [(0, '28.309')] -[2024-09-01 08:57:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7114752. Throughput: 0: 210.5. Samples: 1395990. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:57:02,168][00307] Avg episode reward: [(0, '28.352')] -[2024-09-01 08:57:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7122944. Throughput: 0: 224.0. Samples: 1396980. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:07,163][00307] Avg episode reward: [(0, '28.280')] -[2024-09-01 08:57:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7127040. Throughput: 0: 228.9. Samples: 1398302. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:57:12,157][00307] Avg episode reward: [(0, '28.589')] -[2024-09-01 08:57:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7131136. Throughput: 0: 208.5. Samples: 1399296. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:57:17,160][00307] Avg episode reward: [(0, '28.584')] -[2024-09-01 08:57:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7135232. Throughput: 0: 216.3. Samples: 1400036. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:22,158][00307] Avg episode reward: [(0, '28.286')] -[2024-09-01 08:57:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7139328. Throughput: 0: 228.8. Samples: 1401714. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:27,156][00307] Avg episode reward: [(0, '28.328')] -[2024-09-01 08:57:32,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7143424. Throughput: 0: 221.9. Samples: 1402916. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:32,160][00307] Avg episode reward: [(0, '28.427')] -[2024-09-01 08:57:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7147520. Throughput: 0: 213.5. Samples: 1403254. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:37,156][00307] Avg episode reward: [(0, '28.672')] -[2024-09-01 08:57:38,974][16881] Updated weights for policy 0, policy_version 1746 (0.2042) -[2024-09-01 08:57:42,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7151616. Throughput: 0: 223.6. Samples: 1404976. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:42,167][00307] Avg episode reward: [(0, '28.149')] -[2024-09-01 08:57:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7159808. Throughput: 0: 227.3. Samples: 1406220. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:47,159][00307] Avg episode reward: [(0, '28.237')] -[2024-09-01 08:57:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7159808. Throughput: 0: 221.3. Samples: 1406938. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:52,157][00307] Avg episode reward: [(0, '28.212')] -[2024-09-01 08:57:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7168000. Throughput: 0: 220.9. Samples: 1408244. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:57:57,160][00307] Avg episode reward: [(0, '28.365')] -[2024-09-01 08:58:00,589][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001751_7172096.pth... -[2024-09-01 08:58:00,704][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001699_6959104.pth -[2024-09-01 08:58:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7172096. Throughput: 0: 236.0. Samples: 1409918. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:58:02,157][00307] Avg episode reward: [(0, '28.057')] -[2024-09-01 08:58:07,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7176192. Throughput: 0: 228.2. Samples: 1410308. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:58:07,163][00307] Avg episode reward: [(0, '27.834')] -[2024-09-01 08:58:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7180288. Throughput: 0: 214.3. Samples: 1411358. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:58:12,157][00307] Avg episode reward: [(0, '27.161')] -[2024-09-01 08:58:17,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7184384. Throughput: 0: 226.0. Samples: 1413086. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:58:17,157][00307] Avg episode reward: [(0, '26.800')] -[2024-09-01 08:58:22,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7188480. Throughput: 0: 236.2. Samples: 1413882. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:58:22,160][00307] Avg episode reward: [(0, '26.843')] -[2024-09-01 08:58:24,599][16881] Updated weights for policy 0, policy_version 1756 (0.1713) -[2024-09-01 08:58:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7192576. Throughput: 0: 216.8. Samples: 1414730. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:58:27,163][00307] Avg episode reward: [(0, '26.549')] -[2024-09-01 08:58:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7196672. Throughput: 0: 230.6. Samples: 1416598. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 08:58:32,157][00307] Avg episode reward: [(0, '26.857')] -[2024-09-01 08:58:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7204864. Throughput: 0: 227.1. Samples: 1417158. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:58:37,164][00307] Avg episode reward: [(0, '26.644')] -[2024-09-01 08:58:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7204864. Throughput: 0: 226.2. Samples: 1418422. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:58:42,157][00307] Avg episode reward: [(0, '27.390')] -[2024-09-01 08:58:47,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 7208960. Throughput: 0: 215.2. Samples: 1419604. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:58:47,157][00307] Avg episode reward: [(0, '27.063')] -[2024-09-01 08:58:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7217152. Throughput: 0: 224.6. Samples: 1420412. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:58:52,158][00307] Avg episode reward: [(0, '26.683')] -[2024-09-01 08:58:57,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7221248. Throughput: 0: 228.7. Samples: 1421648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 08:58:57,162][00307] Avg episode reward: [(0, '25.799')] -[2024-09-01 08:59:02,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7225344. Throughput: 0: 215.7. Samples: 1422794. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:02,159][00307] Avg episode reward: [(0, '25.564')] -[2024-09-01 08:59:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 7229440. Throughput: 0: 217.2. Samples: 1423656. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:07,156][00307] Avg episode reward: [(0, '25.604')] -[2024-09-01 08:59:10,510][16881] Updated weights for policy 0, policy_version 1766 (0.3771) -[2024-09-01 08:59:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7233536. Throughput: 0: 225.6. Samples: 1424882. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:12,164][00307] Avg episode reward: [(0, '25.956')] -[2024-09-01 08:59:17,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7237632. Throughput: 0: 216.6. Samples: 1426346. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:17,166][00307] Avg episode reward: [(0, '25.956')] -[2024-09-01 08:59:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7241728. Throughput: 0: 212.4. Samples: 1426714. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:22,161][00307] Avg episode reward: [(0, '25.434')] -[2024-09-01 08:59:27,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7245824. Throughput: 0: 217.0. Samples: 1428188. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:27,164][00307] Avg episode reward: [(0, '26.192')] -[2024-09-01 08:59:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7249920. Throughput: 0: 225.6. Samples: 1429756. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:32,157][00307] Avg episode reward: [(0, '26.156')] -[2024-09-01 08:59:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7254016. Throughput: 0: 217.4. Samples: 1430194. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:37,158][00307] Avg episode reward: [(0, '26.220')] -[2024-09-01 08:59:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7258112. Throughput: 0: 222.7. Samples: 1431670. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:42,157][00307] Avg episode reward: [(0, '26.014')] -[2024-09-01 08:59:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7266304. Throughput: 0: 227.0. Samples: 1433008. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:47,157][00307] Avg episode reward: [(0, '25.112')] -[2024-09-01 08:59:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7270400. Throughput: 0: 228.4. Samples: 1433934. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:52,160][00307] Avg episode reward: [(0, '25.232')] -[2024-09-01 08:59:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7270400. Throughput: 0: 223.6. Samples: 1434946. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 08:59:57,163][00307] Avg episode reward: [(0, '24.851')] -[2024-09-01 08:59:57,188][16881] Updated weights for policy 0, policy_version 1776 (0.0549) -[2024-09-01 08:59:59,609][16868] Signal inference workers to stop experience collection... (1400 times) -[2024-09-01 08:59:59,662][16881] InferenceWorker_p0-w0: stopping experience collection (1400 times) -[2024-09-01 09:00:01,139][16868] Signal inference workers to resume experience collection... (1400 times) -[2024-09-01 09:00:01,141][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001777_7278592.pth... -[2024-09-01 09:00:01,141][16881] InferenceWorker_p0-w0: resuming experience collection (1400 times) -[2024-09-01 09:00:01,252][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001725_7065600.pth -[2024-09-01 09:00:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.7). Total num frames: 7278592. Throughput: 0: 221.9. Samples: 1436330. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:02,157][00307] Avg episode reward: [(0, '24.996')] -[2024-09-01 09:00:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7282688. Throughput: 0: 228.8. Samples: 1437010. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:07,156][00307] Avg episode reward: [(0, '25.110')] -[2024-09-01 09:00:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7286784. Throughput: 0: 220.4. Samples: 1438108. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:12,159][00307] Avg episode reward: [(0, '25.498')] -[2024-09-01 09:00:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7290880. Throughput: 0: 214.8. Samples: 1439420. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:17,156][00307] Avg episode reward: [(0, '25.792')] -[2024-09-01 09:00:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7294976. Throughput: 0: 223.3. Samples: 1440244. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:22,162][00307] Avg episode reward: [(0, '25.983')] -[2024-09-01 09:00:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7299072. Throughput: 0: 224.8. Samples: 1441786. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 09:00:27,157][00307] Avg episode reward: [(0, '26.296')] -[2024-09-01 09:00:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7303168. Throughput: 0: 219.1. Samples: 1442866. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 09:00:32,157][00307] Avg episode reward: [(0, '26.296')] -[2024-09-01 09:00:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7307264. Throughput: 0: 214.3. Samples: 1443576. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 09:00:37,160][00307] Avg episode reward: [(0, '26.375')] -[2024-09-01 09:00:41,720][16881] Updated weights for policy 0, policy_version 1786 (0.1036) -[2024-09-01 09:00:42,157][00307] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7315456. Throughput: 0: 227.6. Samples: 1445190. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 09:00:42,165][00307] Avg episode reward: [(0, '27.108')] -[2024-09-01 09:00:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7315456. Throughput: 0: 219.8. Samples: 1446220. Policy #0 lag: (min: 1.0, avg: 1.2, max: 2.0) -[2024-09-01 09:00:47,158][00307] Avg episode reward: [(0, '27.378')] -[2024-09-01 09:00:52,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7323648. Throughput: 0: 220.1. Samples: 1446916. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:52,163][00307] Avg episode reward: [(0, '27.092')] -[2024-09-01 09:00:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7327744. Throughput: 0: 225.8. Samples: 1448268. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:00:57,156][00307] Avg episode reward: [(0, '27.217')] -[2024-09-01 09:01:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7331840. Throughput: 0: 228.6. Samples: 1449706. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:02,161][00307] Avg episode reward: [(0, '27.254')] -[2024-09-01 09:01:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7335936. Throughput: 0: 222.8. Samples: 1450272. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:07,161][00307] Avg episode reward: [(0, '27.254')] -[2024-09-01 09:01:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7340032. Throughput: 0: 218.4. Samples: 1451616. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:12,157][00307] Avg episode reward: [(0, '27.351')] -[2024-09-01 09:01:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7344128. Throughput: 0: 232.3. Samples: 1453318. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:17,162][00307] Avg episode reward: [(0, '26.860')] -[2024-09-01 09:01:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7348224. Throughput: 0: 226.2. Samples: 1453754. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:22,158][00307] Avg episode reward: [(0, '26.861')] -[2024-09-01 09:01:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7352320. Throughput: 0: 217.9. Samples: 1454996. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:27,163][00307] Avg episode reward: [(0, '27.055')] -[2024-09-01 09:01:28,319][16881] Updated weights for policy 0, policy_version 1796 (0.0568) -[2024-09-01 09:01:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7356416. Throughput: 0: 226.8. Samples: 1456428. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:32,162][00307] Avg episode reward: [(0, '27.115')] -[2024-09-01 09:01:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7364608. Throughput: 0: 234.3. Samples: 1457458. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:37,159][00307] Avg episode reward: [(0, '26.514')] -[2024-09-01 09:01:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7364608. Throughput: 0: 225.0. Samples: 1458392. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:42,163][00307] Avg episode reward: [(0, '26.325')] -[2024-09-01 09:01:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7372800. Throughput: 0: 207.0. Samples: 1459022. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:47,164][00307] Avg episode reward: [(0, '26.135')] -[2024-09-01 09:01:52,161][00307] Fps is (10 sec: 1227.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7376896. Throughput: 0: 228.9. Samples: 1460574. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:01:52,164][00307] Avg episode reward: [(0, '26.177')] -[2024-09-01 09:01:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7380992. Throughput: 0: 224.6. Samples: 1461722. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:01:57,159][00307] Avg episode reward: [(0, '26.506')] -[2024-09-01 09:02:01,117][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001803_7385088.pth... -[2024-09-01 09:02:01,232][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001751_7172096.pth -[2024-09-01 09:02:02,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7385088. Throughput: 0: 216.5. Samples: 1463062. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:02:02,161][00307] Avg episode reward: [(0, '27.138')] -[2024-09-01 09:02:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7389184. Throughput: 0: 221.0. Samples: 1463700. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:02:07,157][00307] Avg episode reward: [(0, '27.625')] -[2024-09-01 09:02:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7393280. Throughput: 0: 231.1. Samples: 1465394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:02:12,168][00307] Avg episode reward: [(0, '27.327')] -[2024-09-01 09:02:14,467][16881] Updated weights for policy 0, policy_version 1806 (0.1045) -[2024-09-01 09:02:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7397376. Throughput: 0: 221.5. Samples: 1466394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:02:17,159][00307] Avg episode reward: [(0, '27.252')] -[2024-09-01 09:02:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7401472. Throughput: 0: 212.9. Samples: 1467040. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:02:22,168][00307] Avg episode reward: [(0, '26.548')] -[2024-09-01 09:02:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7405568. Throughput: 0: 233.6. Samples: 1468904. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:27,167][00307] Avg episode reward: [(0, '26.964')] -[2024-09-01 09:02:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7409664. Throughput: 0: 242.4. Samples: 1469928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:32,157][00307] Avg episode reward: [(0, '27.665')] -[2024-09-01 09:02:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7413760. Throughput: 0: 219.1. Samples: 1470432. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:37,164][00307] Avg episode reward: [(0, '27.284')] -[2024-09-01 09:02:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7421952. Throughput: 0: 227.8. Samples: 1471974. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:42,167][00307] Avg episode reward: [(0, '27.515')] -[2024-09-01 09:02:47,154][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7426048. Throughput: 0: 227.8. Samples: 1473314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:47,157][00307] Avg episode reward: [(0, '27.056')] -[2024-09-01 09:02:52,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7430144. Throughput: 0: 228.8. Samples: 1473996. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:52,163][00307] Avg episode reward: [(0, '27.244')] -[2024-09-01 09:02:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7434240. Throughput: 0: 214.1. Samples: 1475028. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:02:57,167][00307] Avg episode reward: [(0, '26.958')] -[2024-09-01 09:03:00,710][16881] Updated weights for policy 0, policy_version 1816 (0.1095) -[2024-09-01 09:03:02,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7438336. Throughput: 0: 227.4. Samples: 1476626. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:02,157][00307] Avg episode reward: [(0, '26.905')] -[2024-09-01 09:03:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7442432. Throughput: 0: 223.6. Samples: 1477102. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:07,160][00307] Avg episode reward: [(0, '26.336')] -[2024-09-01 09:03:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7446528. Throughput: 0: 208.0. Samples: 1478264. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:03:12,159][00307] Avg episode reward: [(0, '27.230')] -[2024-09-01 09:03:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7450624. Throughput: 0: 225.5. Samples: 1480074. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:03:17,166][00307] Avg episode reward: [(0, '26.778')] -[2024-09-01 09:03:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7454720. Throughput: 0: 225.6. Samples: 1480584. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:22,157][00307] Avg episode reward: [(0, '26.795')] -[2024-09-01 09:03:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7458816. Throughput: 0: 216.9. Samples: 1481736. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:27,159][00307] Avg episode reward: [(0, '26.971')] -[2024-09-01 09:03:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 7462912. Throughput: 0: 220.4. Samples: 1483230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:32,163][00307] Avg episode reward: [(0, '26.594')] -[2024-09-01 09:03:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7471104. Throughput: 0: 225.5. Samples: 1484144. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:37,157][00307] Avg episode reward: [(0, '26.801')] -[2024-09-01 09:03:42,155][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7475200. Throughput: 0: 227.6. Samples: 1485270. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:03:42,158][00307] Avg episode reward: [(0, '26.828')] -[2024-09-01 09:03:47,139][16881] Updated weights for policy 0, policy_version 1826 (0.1950) -[2024-09-01 09:03:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7479296. Throughput: 0: 214.8. Samples: 1486290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:03:47,157][00307] Avg episode reward: [(0, '27.118')] -[2024-09-01 09:03:49,452][16868] Signal inference workers to stop experience collection... (1450 times) -[2024-09-01 09:03:49,488][16881] InferenceWorker_p0-w0: stopping experience collection (1450 times) -[2024-09-01 09:03:50,938][16868] Signal inference workers to resume experience collection... (1450 times) -[2024-09-01 09:03:50,939][16881] InferenceWorker_p0-w0: resuming experience collection (1450 times) -[2024-09-01 09:03:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 7483392. Throughput: 0: 225.9. Samples: 1487266. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:03:52,157][00307] Avg episode reward: [(0, '28.327')] -[2024-09-01 09:03:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7487488. Throughput: 0: 233.7. Samples: 1488780. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:03:57,163][00307] Avg episode reward: [(0, '28.906')] -[2024-09-01 09:04:00,052][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001829_7491584.pth... -[2024-09-01 09:04:00,147][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001777_7278592.pth -[2024-09-01 09:04:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7491584. Throughput: 0: 215.4. Samples: 1489766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:04:02,161][00307] Avg episode reward: [(0, '28.617')] -[2024-09-01 09:04:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7495680. Throughput: 0: 217.6. Samples: 1490378. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:04:07,156][00307] Avg episode reward: [(0, '28.287')] -[2024-09-01 09:04:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7499776. Throughput: 0: 224.2. Samples: 1491824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:04:12,159][00307] Avg episode reward: [(0, '28.442')] -[2024-09-01 09:04:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7503872. Throughput: 0: 226.4. Samples: 1493420. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:17,157][00307] Avg episode reward: [(0, '27.580')] -[2024-09-01 09:04:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7507968. Throughput: 0: 212.4. Samples: 1493704. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:22,162][00307] Avg episode reward: [(0, '27.943')] -[2024-09-01 09:04:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7512064. Throughput: 0: 222.9. Samples: 1495300. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:27,165][00307] Avg episode reward: [(0, '28.267')] -[2024-09-01 09:04:31,665][16881] Updated weights for policy 0, policy_version 1836 (0.1563) -[2024-09-01 09:04:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7520256. Throughput: 0: 229.2. Samples: 1496606. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:32,162][00307] Avg episode reward: [(0, '29.150')] -[2024-09-01 09:04:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7520256. Throughput: 0: 225.6. Samples: 1497418. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:37,160][00307] Avg episode reward: [(0, '29.304')] -[2024-09-01 09:04:42,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 7524352. Throughput: 0: 215.5. Samples: 1498476. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:42,162][00307] Avg episode reward: [(0, '29.508')] -[2024-09-01 09:04:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7532544. Throughput: 0: 226.0. Samples: 1499938. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:47,161][00307] Avg episode reward: [(0, '28.538')] -[2024-09-01 09:04:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7536640. Throughput: 0: 227.1. Samples: 1500598. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:52,161][00307] Avg episode reward: [(0, '28.191')] -[2024-09-01 09:04:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7540736. Throughput: 0: 220.3. Samples: 1501736. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:04:57,158][00307] Avg episode reward: [(0, '27.898')] -[2024-09-01 09:05:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7544832. Throughput: 0: 216.8. Samples: 1503178. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:02,158][00307] Avg episode reward: [(0, '27.084')] -[2024-09-01 09:05:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7548928. Throughput: 0: 225.4. Samples: 1503848. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:07,157][00307] Avg episode reward: [(0, '27.087')] -[2024-09-01 09:05:12,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7553024. Throughput: 0: 218.5. Samples: 1505132. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:12,161][00307] Avg episode reward: [(0, '26.975')] -[2024-09-01 09:05:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7557120. Throughput: 0: 217.3. Samples: 1506384. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:17,157][00307] Avg episode reward: [(0, '26.877')] -[2024-09-01 09:05:19,401][16881] Updated weights for policy 0, policy_version 1846 (0.1598) -[2024-09-01 09:05:22,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7561216. Throughput: 0: 211.8. Samples: 1506948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:05:22,166][00307] Avg episode reward: [(0, '26.612')] -[2024-09-01 09:05:27,157][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7569408. Throughput: 0: 227.4. Samples: 1508708. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:05:27,160][00307] Avg episode reward: [(0, '26.245')] -[2024-09-01 09:05:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7569408. Throughput: 0: 219.1. Samples: 1509798. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:05:32,157][00307] Avg episode reward: [(0, '26.606')] -[2024-09-01 09:05:37,154][00307] Fps is (10 sec: 409.7, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 7573504. Throughput: 0: 217.7. Samples: 1510396. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:05:37,156][00307] Avg episode reward: [(0, '27.396')] -[2024-09-01 09:05:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7581696. Throughput: 0: 223.9. Samples: 1511810. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:05:42,157][00307] Avg episode reward: [(0, '28.132')] -[2024-09-01 09:05:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7585792. Throughput: 0: 221.2. Samples: 1513134. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:47,162][00307] Avg episode reward: [(0, '28.110')] -[2024-09-01 09:05:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7589888. Throughput: 0: 222.3. Samples: 1513850. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:05:52,159][00307] Avg episode reward: [(0, '28.199')] -[2024-09-01 09:05:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7593984. Throughput: 0: 223.0. Samples: 1515166. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:05:57,160][00307] Avg episode reward: [(0, '27.623')] -[2024-09-01 09:05:59,166][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001855_7598080.pth... -[2024-09-01 09:05:59,280][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001803_7385088.pth -[2024-09-01 09:06:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7598080. Throughput: 0: 233.9. Samples: 1516910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:06:02,161][00307] Avg episode reward: [(0, '26.823')] -[2024-09-01 09:06:03,778][16881] Updated weights for policy 0, policy_version 1856 (0.1240) -[2024-09-01 09:06:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7602176. Throughput: 0: 228.1. Samples: 1517214. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:06:07,160][00307] Avg episode reward: [(0, '26.494')] -[2024-09-01 09:06:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7606272. Throughput: 0: 216.0. Samples: 1518426. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:06:12,160][00307] Avg episode reward: [(0, '27.038')] -[2024-09-01 09:06:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7610368. Throughput: 0: 225.4. Samples: 1519940. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:06:17,156][00307] Avg episode reward: [(0, '27.062')] -[2024-09-01 09:06:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7614464. Throughput: 0: 233.4. Samples: 1520898. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:06:22,157][00307] Avg episode reward: [(0, '27.193')] -[2024-09-01 09:06:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7618560. Throughput: 0: 218.4. Samples: 1521640. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) -[2024-09-01 09:06:27,163][00307] Avg episode reward: [(0, '26.920')] -[2024-09-01 09:06:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7626752. Throughput: 0: 220.6. Samples: 1523062. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:32,157][00307] Avg episode reward: [(0, '26.273')] -[2024-09-01 09:06:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7630848. Throughput: 0: 226.5. Samples: 1524044. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:37,165][00307] Avg episode reward: [(0, '25.687')] -[2024-09-01 09:06:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7634944. Throughput: 0: 220.5. Samples: 1525090. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:42,164][00307] Avg episode reward: [(0, '24.963')] -[2024-09-01 09:06:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7639040. Throughput: 0: 211.8. Samples: 1526442. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:47,157][00307] Avg episode reward: [(0, '24.715')] -[2024-09-01 09:06:50,483][16881] Updated weights for policy 0, policy_version 1866 (0.1715) -[2024-09-01 09:06:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7643136. Throughput: 0: 219.6. Samples: 1527098. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:52,157][00307] Avg episode reward: [(0, '25.696')] -[2024-09-01 09:06:57,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7647232. Throughput: 0: 231.8. Samples: 1528858. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:06:57,163][00307] Avg episode reward: [(0, '25.622')] -[2024-09-01 09:07:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7651328. Throughput: 0: 219.9. Samples: 1529834. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:07:02,157][00307] Avg episode reward: [(0, '25.602')] -[2024-09-01 09:07:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7655424. Throughput: 0: 211.1. Samples: 1530398. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:07:07,156][00307] Avg episode reward: [(0, '26.250')] -[2024-09-01 09:07:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7659520. Throughput: 0: 236.0. Samples: 1532258. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:07:12,158][00307] Avg episode reward: [(0, '26.477')] -[2024-09-01 09:07:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7663616. Throughput: 0: 228.1. Samples: 1533326. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:07:17,157][00307] Avg episode reward: [(0, '26.463')] -[2024-09-01 09:07:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7667712. Throughput: 0: 217.2. Samples: 1533816. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:07:22,157][00307] Avg episode reward: [(0, '26.482')] -[2024-09-01 09:07:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7675904. Throughput: 0: 228.8. Samples: 1535384. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:07:27,157][00307] Avg episode reward: [(0, '26.416')] -[2024-09-01 09:07:32,167][00307] Fps is (10 sec: 1227.2, 60 sec: 887.3, 300 sec: 902.5). Total num frames: 7680000. Throughput: 0: 230.9. Samples: 1536834. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:07:32,178][00307] Avg episode reward: [(0, '25.851')] -[2024-09-01 09:07:36,158][16881] Updated weights for policy 0, policy_version 1876 (0.0541) -[2024-09-01 09:07:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7684096. Throughput: 0: 229.0. Samples: 1537404. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:07:37,156][00307] Avg episode reward: [(0, '25.343')] -[2024-09-01 09:07:39,702][16868] Signal inference workers to stop experience collection... (1500 times) -[2024-09-01 09:07:39,753][16881] InferenceWorker_p0-w0: stopping experience collection (1500 times) -[2024-09-01 09:07:40,935][16868] Signal inference workers to resume experience collection... (1500 times) -[2024-09-01 09:07:40,936][16881] InferenceWorker_p0-w0: resuming experience collection (1500 times) -[2024-09-01 09:07:42,154][00307] Fps is (10 sec: 820.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7688192. Throughput: 0: 214.1. Samples: 1538492. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:07:42,157][00307] Avg episode reward: [(0, '25.635')] -[2024-09-01 09:07:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7692288. Throughput: 0: 234.0. Samples: 1540362. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:07:47,157][00307] Avg episode reward: [(0, '24.844')] -[2024-09-01 09:07:52,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7696384. Throughput: 0: 227.5. Samples: 1540636. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:07:52,161][00307] Avg episode reward: [(0, '25.119')] -[2024-09-01 09:07:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7700480. Throughput: 0: 211.7. Samples: 1541786. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:07:57,157][00307] Avg episode reward: [(0, '25.694')] -[2024-09-01 09:07:59,053][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001881_7704576.pth... -[2024-09-01 09:07:59,165][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001829_7491584.pth -[2024-09-01 09:08:02,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7704576. Throughput: 0: 225.9. Samples: 1543490. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:08:02,166][00307] Avg episode reward: [(0, '25.838')] -[2024-09-01 09:08:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7708672. Throughput: 0: 232.2. Samples: 1544264. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) -[2024-09-01 09:08:07,164][00307] Avg episode reward: [(0, '25.511')] -[2024-09-01 09:08:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7712768. Throughput: 0: 222.1. Samples: 1545378. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) -[2024-09-01 09:08:12,159][00307] Avg episode reward: [(0, '24.923')] -[2024-09-01 09:08:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7716864. Throughput: 0: 216.7. Samples: 1546582. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:17,157][00307] Avg episode reward: [(0, '24.844')] -[2024-09-01 09:08:21,215][16881] Updated weights for policy 0, policy_version 1886 (0.0700) -[2024-09-01 09:08:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7725056. Throughput: 0: 225.8. Samples: 1547566. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:22,157][00307] Avg episode reward: [(0, '24.777')] -[2024-09-01 09:08:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7729152. Throughput: 0: 226.6. Samples: 1548690. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:27,159][00307] Avg episode reward: [(0, '24.972')] -[2024-09-01 09:08:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.7, 300 sec: 888.6). Total num frames: 7733248. Throughput: 0: 206.7. Samples: 1549662. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:32,158][00307] Avg episode reward: [(0, '25.044')] -[2024-09-01 09:08:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7737344. Throughput: 0: 223.0. Samples: 1550670. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:37,161][00307] Avg episode reward: [(0, '26.284')] -[2024-09-01 09:08:42,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7741440. Throughput: 0: 228.8. Samples: 1552082. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:42,163][00307] Avg episode reward: [(0, '26.082')] -[2024-09-01 09:08:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7745536. Throughput: 0: 215.3. Samples: 1553180. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:47,160][00307] Avg episode reward: [(0, '26.223')] -[2024-09-01 09:08:52,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7749632. Throughput: 0: 210.8. Samples: 1553748. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:52,160][00307] Avg episode reward: [(0, '26.546')] -[2024-09-01 09:08:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7753728. Throughput: 0: 225.9. Samples: 1555544. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:08:57,156][00307] Avg episode reward: [(0, '25.767')] -[2024-09-01 09:09:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7757824. Throughput: 0: 229.0. Samples: 1556888. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:09:02,160][00307] Avg episode reward: [(0, '25.651')] -[2024-09-01 09:09:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7761920. Throughput: 0: 214.3. Samples: 1557208. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:09:07,158][00307] Avg episode reward: [(0, '25.469')] -[2024-09-01 09:09:08,890][16881] Updated weights for policy 0, policy_version 1896 (0.1040) -[2024-09-01 09:09:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7766016. Throughput: 0: 225.2. Samples: 1558824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:12,157][00307] Avg episode reward: [(0, '25.582')] -[2024-09-01 09:09:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7774208. Throughput: 0: 232.6. Samples: 1560128. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:17,160][00307] Avg episode reward: [(0, '26.049')] -[2024-09-01 09:09:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7774208. Throughput: 0: 226.3. Samples: 1560852. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:22,157][00307] Avg episode reward: [(0, '26.621')] -[2024-09-01 09:09:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 7778304. Throughput: 0: 220.3. Samples: 1561996. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:27,166][00307] Avg episode reward: [(0, '26.616')] -[2024-09-01 09:09:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7786496. Throughput: 0: 228.3. Samples: 1563454. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:32,158][00307] Avg episode reward: [(0, '26.513')] -[2024-09-01 09:09:37,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7790592. Throughput: 0: 229.2. Samples: 1564064. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:09:37,159][00307] Avg episode reward: [(0, '27.432')] -[2024-09-01 09:09:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7794688. Throughput: 0: 213.3. Samples: 1565142. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:09:42,163][00307] Avg episode reward: [(0, '27.353')] -[2024-09-01 09:09:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7798784. Throughput: 0: 215.3. Samples: 1566576. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:47,162][00307] Avg episode reward: [(0, '27.374')] -[2024-09-01 09:09:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7802880. Throughput: 0: 222.5. Samples: 1567220. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:52,157][00307] Avg episode reward: [(0, '26.321')] -[2024-09-01 09:09:53,290][16881] Updated weights for policy 0, policy_version 1906 (0.1046) -[2024-09-01 09:09:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7806976. Throughput: 0: 221.1. Samples: 1568774. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:09:57,157][00307] Avg episode reward: [(0, '26.958')] -[2024-09-01 09:09:59,137][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001907_7811072.pth... -[2024-09-01 09:09:59,220][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001855_7598080.pth -[2024-09-01 09:10:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7811072. Throughput: 0: 219.2. Samples: 1569992. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:10:02,157][00307] Avg episode reward: [(0, '27.526')] -[2024-09-01 09:10:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7815168. Throughput: 0: 212.7. Samples: 1570424. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:10:07,157][00307] Avg episode reward: [(0, '27.403')] -[2024-09-01 09:10:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7823360. Throughput: 0: 227.1. Samples: 1572216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:12,162][00307] Avg episode reward: [(0, '27.790')] -[2024-09-01 09:10:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7823360. Throughput: 0: 217.3. Samples: 1573232. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:17,157][00307] Avg episode reward: [(0, '28.047')] -[2024-09-01 09:10:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 7827456. Throughput: 0: 212.6. Samples: 1573630. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:22,158][00307] Avg episode reward: [(0, '28.669')] -[2024-09-01 09:10:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7835648. Throughput: 0: 225.8. Samples: 1575304. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:27,160][00307] Avg episode reward: [(0, '28.844')] -[2024-09-01 09:10:32,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 7839744. Throughput: 0: 222.7. Samples: 1576596. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:32,158][00307] Avg episode reward: [(0, '28.761')] -[2024-09-01 09:10:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7843840. Throughput: 0: 223.0. Samples: 1577256. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:37,157][00307] Avg episode reward: [(0, '28.742')] -[2024-09-01 09:10:40,576][16881] Updated weights for policy 0, policy_version 1916 (0.1183) -[2024-09-01 09:10:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7847936. Throughput: 0: 217.8. Samples: 1578576. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:42,157][00307] Avg episode reward: [(0, '28.022')] -[2024-09-01 09:10:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7852032. Throughput: 0: 231.3. Samples: 1580402. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:47,162][00307] Avg episode reward: [(0, '27.570')] -[2024-09-01 09:10:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7856128. Throughput: 0: 223.6. Samples: 1580486. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:52,161][00307] Avg episode reward: [(0, '27.570')] -[2024-09-01 09:10:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7860224. Throughput: 0: 218.5. Samples: 1582050. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:10:57,161][00307] Avg episode reward: [(0, '27.497')] -[2024-09-01 09:11:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7864320. Throughput: 0: 228.0. Samples: 1583494. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:11:02,167][00307] Avg episode reward: [(0, '27.704')] -[2024-09-01 09:11:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7868416. Throughput: 0: 239.8. Samples: 1584420. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:11:07,159][00307] Avg episode reward: [(0, '27.704')] -[2024-09-01 09:11:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 7872512. Throughput: 0: 224.4. Samples: 1585400. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:11:12,160][00307] Avg episode reward: [(0, '27.741')] -[2024-09-01 09:11:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7880704. Throughput: 0: 225.1. Samples: 1586724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:17,157][00307] Avg episode reward: [(0, '27.940')] -[2024-09-01 09:11:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7884800. Throughput: 0: 232.4. Samples: 1587716. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:22,160][00307] Avg episode reward: [(0, '28.279')] -[2024-09-01 09:11:26,255][16881] Updated weights for policy 0, policy_version 1926 (0.0709) -[2024-09-01 09:11:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7888896. Throughput: 0: 225.7. Samples: 1588732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:27,160][00307] Avg episode reward: [(0, '29.044')] -[2024-09-01 09:11:30,069][16868] Signal inference workers to stop experience collection... (1550 times) -[2024-09-01 09:11:30,106][16881] InferenceWorker_p0-w0: stopping experience collection (1550 times) -[2024-09-01 09:11:31,208][16868] Signal inference workers to resume experience collection... (1550 times) -[2024-09-01 09:11:31,209][16881] InferenceWorker_p0-w0: resuming experience collection (1550 times) -[2024-09-01 09:11:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7892992. Throughput: 0: 216.4. Samples: 1590138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:32,160][00307] Avg episode reward: [(0, '28.303')] -[2024-09-01 09:11:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7897088. Throughput: 0: 229.3. Samples: 1590806. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:37,156][00307] Avg episode reward: [(0, '28.716')] -[2024-09-01 09:11:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7901184. Throughput: 0: 227.9. Samples: 1592304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:42,157][00307] Avg episode reward: [(0, '28.426')] -[2024-09-01 09:11:47,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7905280. Throughput: 0: 219.1. Samples: 1593354. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:47,158][00307] Avg episode reward: [(0, '28.547')] -[2024-09-01 09:11:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7909376. Throughput: 0: 214.6. Samples: 1594076. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:52,164][00307] Avg episode reward: [(0, '27.707')] -[2024-09-01 09:11:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7913472. Throughput: 0: 235.7. Samples: 1596006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:11:57,158][00307] Avg episode reward: [(0, '27.931')] -[2024-09-01 09:12:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7917568. Throughput: 0: 228.3. Samples: 1596998. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:12:02,156][00307] Avg episode reward: [(0, '28.348')] -[2024-09-01 09:12:03,501][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001934_7921664.pth... -[2024-09-01 09:12:03,681][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001881_7704576.pth -[2024-09-01 09:12:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7921664. Throughput: 0: 216.0. Samples: 1597434. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:12:07,163][00307] Avg episode reward: [(0, '28.028')] -[2024-09-01 09:12:11,892][16881] Updated weights for policy 0, policy_version 1936 (0.0562) -[2024-09-01 09:12:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7929856. Throughput: 0: 230.2. Samples: 1599092. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:12,161][00307] Avg episode reward: [(0, '27.835')] -[2024-09-01 09:12:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7933952. Throughput: 0: 225.4. Samples: 1600280. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:17,161][00307] Avg episode reward: [(0, '27.422')] -[2024-09-01 09:12:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 7933952. Throughput: 0: 224.9. Samples: 1600926. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:22,159][00307] Avg episode reward: [(0, '27.507')] -[2024-09-01 09:12:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.7). Total num frames: 7942144. Throughput: 0: 219.3. Samples: 1602172. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:27,159][00307] Avg episode reward: [(0, '27.579')] -[2024-09-01 09:12:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7946240. Throughput: 0: 237.2. Samples: 1604030. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:32,163][00307] Avg episode reward: [(0, '26.427')] -[2024-09-01 09:12:37,161][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7950336. Throughput: 0: 226.2. Samples: 1604256. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:37,172][00307] Avg episode reward: [(0, '26.634')] -[2024-09-01 09:12:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7954432. Throughput: 0: 210.2. Samples: 1605466. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:42,161][00307] Avg episode reward: [(0, '26.820')] -[2024-09-01 09:12:47,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7958528. Throughput: 0: 225.6. Samples: 1607152. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:47,157][00307] Avg episode reward: [(0, '27.214')] -[2024-09-01 09:12:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7962624. Throughput: 0: 234.2. Samples: 1607974. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:52,160][00307] Avg episode reward: [(0, '27.488')] -[2024-09-01 09:12:57,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 7966720. Throughput: 0: 217.5. Samples: 1608882. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:12:57,167][00307] Avg episode reward: [(0, '27.636')] -[2024-09-01 09:12:58,642][16881] Updated weights for policy 0, policy_version 1946 (0.0539) -[2024-09-01 09:13:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7970816. Throughput: 0: 222.6. Samples: 1610298. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:13:02,164][00307] Avg episode reward: [(0, '27.700')] -[2024-09-01 09:13:07,156][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 7979008. Throughput: 0: 230.5. Samples: 1611298. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:07,159][00307] Avg episode reward: [(0, '27.622')] -[2024-09-01 09:13:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 7983104. Throughput: 0: 225.0. Samples: 1612296. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:12,160][00307] Avg episode reward: [(0, '26.928')] -[2024-09-01 09:13:17,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7987200. Throughput: 0: 208.5. Samples: 1613412. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:13:17,156][00307] Avg episode reward: [(0, '27.018')] -[2024-09-01 09:13:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 7991296. Throughput: 0: 224.3. Samples: 1614350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:13:22,160][00307] Avg episode reward: [(0, '27.584')] -[2024-09-01 09:13:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7995392. Throughput: 0: 225.0. Samples: 1615590. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:27,157][00307] Avg episode reward: [(0, '27.467')] -[2024-09-01 09:13:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 7999488. Throughput: 0: 212.8. Samples: 1616728. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:32,161][00307] Avg episode reward: [(0, '27.533')] -[2024-09-01 09:13:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 8003584. Throughput: 0: 210.4. Samples: 1617444. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:37,163][00307] Avg episode reward: [(0, '27.829')] -[2024-09-01 09:13:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8007680. Throughput: 0: 226.6. Samples: 1619080. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:42,165][00307] Avg episode reward: [(0, '27.137')] -[2024-09-01 09:13:43,669][16881] Updated weights for policy 0, policy_version 1956 (0.1113) -[2024-09-01 09:13:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8011776. Throughput: 0: 223.4. Samples: 1620352. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:13:47,161][00307] Avg episode reward: [(0, '27.181')] -[2024-09-01 09:13:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8015872. Throughput: 0: 208.5. Samples: 1620680. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:13:52,157][00307] Avg episode reward: [(0, '26.735')] -[2024-09-01 09:13:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8019968. Throughput: 0: 227.5. Samples: 1622532. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:13:57,157][00307] Avg episode reward: [(0, '26.667')] -[2024-09-01 09:14:01,796][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001960_8028160.pth... -[2024-09-01 09:14:01,916][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001907_7811072.pth -[2024-09-01 09:14:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8028160. Throughput: 0: 227.8. Samples: 1623664. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:02,157][00307] Avg episode reward: [(0, '27.241')] -[2024-09-01 09:14:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8028160. Throughput: 0: 222.6. Samples: 1624366. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:07,157][00307] Avg episode reward: [(0, '26.859')] -[2024-09-01 09:14:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8036352. Throughput: 0: 224.0. Samples: 1625668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:12,168][00307] Avg episode reward: [(0, '26.896')] -[2024-09-01 09:14:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8040448. Throughput: 0: 231.5. Samples: 1627146. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:17,157][00307] Avg episode reward: [(0, '27.116')] -[2024-09-01 09:14:22,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 8044544. Throughput: 0: 228.5. Samples: 1627728. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:22,167][00307] Avg episode reward: [(0, '27.097')] -[2024-09-01 09:14:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8048640. Throughput: 0: 215.9. Samples: 1628796. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:27,156][00307] Avg episode reward: [(0, '27.519')] -[2024-09-01 09:14:30,892][16881] Updated weights for policy 0, policy_version 1966 (0.1103) -[2024-09-01 09:14:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8052736. Throughput: 0: 221.5. Samples: 1630320. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:32,162][00307] Avg episode reward: [(0, '27.071')] -[2024-09-01 09:14:37,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8056832. Throughput: 0: 228.3. Samples: 1630954. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:37,163][00307] Avg episode reward: [(0, '26.826')] -[2024-09-01 09:14:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8060928. Throughput: 0: 215.5. Samples: 1632230. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:42,159][00307] Avg episode reward: [(0, '27.180')] -[2024-09-01 09:14:47,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8065024. Throughput: 0: 222.3. Samples: 1633668. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:14:47,165][00307] Avg episode reward: [(0, '27.041')] -[2024-09-01 09:14:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8069120. Throughput: 0: 216.5. Samples: 1634110. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:52,159][00307] Avg episode reward: [(0, '26.526')] -[2024-09-01 09:14:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8073216. Throughput: 0: 222.5. Samples: 1635680. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:14:57,159][00307] Avg episode reward: [(0, '25.640')] -[2024-09-01 09:15:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8077312. Throughput: 0: 215.4. Samples: 1636840. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:02,163][00307] Avg episode reward: [(0, '26.039')] -[2024-09-01 09:15:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8081408. Throughput: 0: 219.2. Samples: 1637590. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:07,157][00307] Avg episode reward: [(0, '26.215')] -[2024-09-01 09:15:12,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8089600. Throughput: 0: 226.5. Samples: 1638988. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:12,160][00307] Avg episode reward: [(0, '26.094')] -[2024-09-01 09:15:17,007][16881] Updated weights for policy 0, policy_version 1976 (0.2577) -[2024-09-01 09:15:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8093696. Throughput: 0: 215.8. Samples: 1640030. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:17,159][00307] Avg episode reward: [(0, '25.882')] -[2024-09-01 09:15:20,595][16868] Signal inference workers to stop experience collection... (1600 times) -[2024-09-01 09:15:20,640][16881] InferenceWorker_p0-w0: stopping experience collection (1600 times) -[2024-09-01 09:15:21,837][16868] Signal inference workers to resume experience collection... (1600 times) -[2024-09-01 09:15:21,839][16881] InferenceWorker_p0-w0: resuming experience collection (1600 times) -[2024-09-01 09:15:22,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8097792. Throughput: 0: 218.2. Samples: 1640770. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:22,164][00307] Avg episode reward: [(0, '25.849')] -[2024-09-01 09:15:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8101888. Throughput: 0: 221.5. Samples: 1642198. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:27,161][00307] Avg episode reward: [(0, '25.912')] -[2024-09-01 09:15:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8105984. Throughput: 0: 224.5. Samples: 1643772. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:32,157][00307] Avg episode reward: [(0, '25.595')] -[2024-09-01 09:15:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8110080. Throughput: 0: 222.4. Samples: 1644116. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:37,156][00307] Avg episode reward: [(0, '25.780')] -[2024-09-01 09:15:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8114176. Throughput: 0: 220.4. Samples: 1645596. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:42,162][00307] Avg episode reward: [(0, '25.625')] -[2024-09-01 09:15:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8118272. Throughput: 0: 228.4. Samples: 1647116. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:47,162][00307] Avg episode reward: [(0, '25.909')] -[2024-09-01 09:15:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8122368. Throughput: 0: 224.5. Samples: 1647692. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:15:52,159][00307] Avg episode reward: [(0, '25.674')] -[2024-09-01 09:15:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8126464. Throughput: 0: 221.9. Samples: 1648972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:15:57,157][00307] Avg episode reward: [(0, '25.921')] -[2024-09-01 09:16:01,573][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001986_8134656.pth... -[2024-09-01 09:16:01,590][16881] Updated weights for policy 0, policy_version 1986 (0.1196) -[2024-09-01 09:16:01,686][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001934_7921664.pth -[2024-09-01 09:16:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8134656. Throughput: 0: 230.2. Samples: 1650390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:16:02,157][00307] Avg episode reward: [(0, '26.237')] -[2024-09-01 09:16:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8138752. Throughput: 0: 233.0. Samples: 1651256. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:16:07,161][00307] Avg episode reward: [(0, '25.898')] -[2024-09-01 09:16:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8138752. Throughput: 0: 225.2. Samples: 1652334. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:16:12,161][00307] Avg episode reward: [(0, '26.358')] -[2024-09-01 09:16:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8146944. Throughput: 0: 216.0. Samples: 1653492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:16:17,161][00307] Avg episode reward: [(0, '26.611')] -[2024-09-01 09:16:22,161][00307] Fps is (10 sec: 1228.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8151040. Throughput: 0: 227.8. Samples: 1654368. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:16:22,169][00307] Avg episode reward: [(0, '26.536')] -[2024-09-01 09:16:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8155136. Throughput: 0: 222.2. Samples: 1655594. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:16:27,167][00307] Avg episode reward: [(0, '26.907')] -[2024-09-01 09:16:32,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8159232. Throughput: 0: 220.1. Samples: 1657022. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:16:32,165][00307] Avg episode reward: [(0, '26.501')] -[2024-09-01 09:16:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8163328. Throughput: 0: 217.6. Samples: 1657486. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:16:37,157][00307] Avg episode reward: [(0, '26.776')] -[2024-09-01 09:16:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8167424. Throughput: 0: 226.5. Samples: 1659164. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:16:42,163][00307] Avg episode reward: [(0, '26.607')] -[2024-09-01 09:16:47,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8171520. Throughput: 0: 220.9. Samples: 1660332. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:16:47,163][00307] Avg episode reward: [(0, '26.505')] -[2024-09-01 09:16:49,057][16881] Updated weights for policy 0, policy_version 1996 (0.2063) -[2024-09-01 09:16:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8175616. Throughput: 0: 214.8. Samples: 1660922. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:16:52,164][00307] Avg episode reward: [(0, '27.630')] -[2024-09-01 09:16:57,154][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8183808. Throughput: 0: 226.0. Samples: 1662504. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:16:57,156][00307] Avg episode reward: [(0, '27.836')] -[2024-09-01 09:17:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8187904. Throughput: 0: 223.6. Samples: 1663552. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:17:02,160][00307] Avg episode reward: [(0, '27.592')] -[2024-09-01 09:17:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8187904. Throughput: 0: 219.9. Samples: 1664260. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:17:07,164][00307] Avg episode reward: [(0, '27.874')] -[2024-09-01 09:17:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8196096. Throughput: 0: 221.7. Samples: 1665572. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:12,165][00307] Avg episode reward: [(0, '27.730')] -[2024-09-01 09:17:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8200192. Throughput: 0: 228.1. Samples: 1667286. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:17:17,163][00307] Avg episode reward: [(0, '28.558')] -[2024-09-01 09:17:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 8204288. Throughput: 0: 224.8. Samples: 1667600. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:17:22,157][00307] Avg episode reward: [(0, '29.517')] -[2024-09-01 09:17:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8208384. Throughput: 0: 210.2. Samples: 1668624. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:17:27,164][00307] Avg episode reward: [(0, '29.517')] -[2024-09-01 09:17:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8212480. Throughput: 0: 229.8. Samples: 1670674. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:17:32,157][00307] Avg episode reward: [(0, '30.082')] -[2024-09-01 09:17:33,694][16881] Updated weights for policy 0, policy_version 2006 (0.2040) -[2024-09-01 09:17:37,162][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 8216576. Throughput: 0: 224.7. Samples: 1671034. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:37,165][00307] Avg episode reward: [(0, '29.958')] -[2024-09-01 09:17:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8220672. Throughput: 0: 215.5. Samples: 1672200. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:42,158][00307] Avg episode reward: [(0, '30.680')] -[2024-09-01 09:17:43,933][16868] Saving new best policy, reward=30.680! -[2024-09-01 09:17:47,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8224768. Throughput: 0: 225.9. Samples: 1673716. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:47,162][00307] Avg episode reward: [(0, '29.704')] -[2024-09-01 09:17:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8232960. Throughput: 0: 226.1. Samples: 1674434. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:52,162][00307] Avg episode reward: [(0, '29.909')] -[2024-09-01 09:17:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8232960. Throughput: 0: 227.2. Samples: 1675794. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:17:57,163][00307] Avg episode reward: [(0, '30.292')] -[2024-09-01 09:18:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8237056. Throughput: 0: 213.2. Samples: 1676878. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:18:02,157][00307] Avg episode reward: [(0, '30.016')] -[2024-09-01 09:18:02,288][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002012_8241152.pth... -[2024-09-01 09:18:02,398][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001960_8028160.pth -[2024-09-01 09:18:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8245248. Throughput: 0: 229.4. Samples: 1677922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:18:07,164][00307] Avg episode reward: [(0, '29.551')] -[2024-09-01 09:18:12,160][00307] Fps is (10 sec: 1228.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8249344. Throughput: 0: 232.0. Samples: 1679064. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:12,163][00307] Avg episode reward: [(0, '29.682')] -[2024-09-01 09:18:17,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8253440. Throughput: 0: 209.1. Samples: 1680082. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:17,162][00307] Avg episode reward: [(0, '29.960')] -[2024-09-01 09:18:21,078][16881] Updated weights for policy 0, policy_version 2016 (0.0559) -[2024-09-01 09:18:22,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8257536. Throughput: 0: 222.4. Samples: 1681042. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:22,157][00307] Avg episode reward: [(0, '30.410')] -[2024-09-01 09:18:27,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8261632. Throughput: 0: 228.7. Samples: 1682492. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:27,157][00307] Avg episode reward: [(0, '30.445')] -[2024-09-01 09:18:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8265728. Throughput: 0: 222.0. Samples: 1683706. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:32,157][00307] Avg episode reward: [(0, '30.200')] -[2024-09-01 09:18:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 8269824. Throughput: 0: 216.8. Samples: 1684192. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:18:37,157][00307] Avg episode reward: [(0, '29.977')] -[2024-09-01 09:18:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8273920. Throughput: 0: 223.0. Samples: 1685830. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:18:42,161][00307] Avg episode reward: [(0, '29.986')] -[2024-09-01 09:18:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8278016. Throughput: 0: 230.2. Samples: 1687238. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:18:47,157][00307] Avg episode reward: [(0, '29.669')] -[2024-09-01 09:18:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8282112. Throughput: 0: 214.4. Samples: 1687570. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:18:52,160][00307] Avg episode reward: [(0, '29.304')] -[2024-09-01 09:18:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8286208. Throughput: 0: 218.3. Samples: 1688886. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:18:57,158][00307] Avg episode reward: [(0, '29.833')] -[2024-09-01 09:19:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8294400. Throughput: 0: 209.2. Samples: 1689496. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:19:02,161][00307] Avg episode reward: [(0, '30.240')] -[2024-09-01 09:19:07,149][16881] Updated weights for policy 0, policy_version 2026 (0.1600) -[2024-09-01 09:19:07,165][00307] Fps is (10 sec: 1227.5, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 8298496. Throughput: 0: 222.2. Samples: 1691042. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:19:07,176][00307] Avg episode reward: [(0, '29.187')] -[2024-09-01 09:19:10,955][16868] Signal inference workers to stop experience collection... (1650 times) -[2024-09-01 09:19:11,061][16881] InferenceWorker_p0-w0: stopping experience collection (1650 times) -[2024-09-01 09:19:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 8298496. Throughput: 0: 215.7. Samples: 1692198. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:19:12,160][00307] Avg episode reward: [(0, '28.363')] -[2024-09-01 09:19:12,496][16868] Signal inference workers to resume experience collection... (1650 times) -[2024-09-01 09:19:12,497][16881] InferenceWorker_p0-w0: resuming experience collection (1650 times) -[2024-09-01 09:19:17,154][00307] Fps is (10 sec: 820.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8306688. Throughput: 0: 218.4. Samples: 1693536. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:19:17,159][00307] Avg episode reward: [(0, '27.892')] -[2024-09-01 09:19:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8310784. Throughput: 0: 223.9. Samples: 1694266. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:19:22,158][00307] Avg episode reward: [(0, '27.666')] -[2024-09-01 09:19:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8314880. Throughput: 0: 210.4. Samples: 1695296. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:19:27,156][00307] Avg episode reward: [(0, '26.987')] -[2024-09-01 09:19:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8318976. Throughput: 0: 210.4. Samples: 1696708. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:19:32,156][00307] Avg episode reward: [(0, '26.763')] -[2024-09-01 09:19:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8323072. Throughput: 0: 216.7. Samples: 1697320. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:19:37,157][00307] Avg episode reward: [(0, '25.960')] -[2024-09-01 09:19:42,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8327168. Throughput: 0: 223.2. Samples: 1698930. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:19:42,161][00307] Avg episode reward: [(0, '25.452')] -[2024-09-01 09:19:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8331264. Throughput: 0: 230.6. Samples: 1699872. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:19:47,157][00307] Avg episode reward: [(0, '25.569')] -[2024-09-01 09:19:52,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8335360. Throughput: 0: 211.3. Samples: 1700550. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:19:52,157][00307] Avg episode reward: [(0, '25.726')] -[2024-09-01 09:19:53,699][16881] Updated weights for policy 0, policy_version 2036 (0.1042) -[2024-09-01 09:19:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8339456. Throughput: 0: 228.0. Samples: 1702458. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:19:57,158][00307] Avg episode reward: [(0, '25.302')] -[2024-09-01 09:20:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8343552. Throughput: 0: 219.7. Samples: 1703424. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:20:02,157][00307] Avg episode reward: [(0, '24.625')] -[2024-09-01 09:20:03,501][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002038_8347648.pth... -[2024-09-01 09:20:03,650][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001986_8134656.pth -[2024-09-01 09:20:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 8347648. Throughput: 0: 213.1. Samples: 1703854. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:20:07,156][00307] Avg episode reward: [(0, '24.863')] -[2024-09-01 09:20:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8355840. Throughput: 0: 227.3. Samples: 1705524. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:20:12,163][00307] Avg episode reward: [(0, '24.928')] -[2024-09-01 09:20:17,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8359936. Throughput: 0: 222.1. Samples: 1706702. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:20:17,158][00307] Avg episode reward: [(0, '24.742')] -[2024-09-01 09:20:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8359936. Throughput: 0: 224.0. Samples: 1707402. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:20:22,157][00307] Avg episode reward: [(0, '24.803')] -[2024-09-01 09:20:27,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8368128. Throughput: 0: 213.6. Samples: 1708542. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:20:27,157][00307] Avg episode reward: [(0, '24.430')] -[2024-09-01 09:20:32,157][00307] Fps is (10 sec: 1228.4, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8372224. Throughput: 0: 235.1. Samples: 1710454. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:32,164][00307] Avg episode reward: [(0, '24.251')] -[2024-09-01 09:20:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8376320. Throughput: 0: 225.1. Samples: 1710680. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:37,159][00307] Avg episode reward: [(0, '24.114')] -[2024-09-01 09:20:40,334][16881] Updated weights for policy 0, policy_version 2046 (0.1737) -[2024-09-01 09:20:42,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8380416. Throughput: 0: 210.4. Samples: 1711928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:42,170][00307] Avg episode reward: [(0, '24.201')] -[2024-09-01 09:20:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8384512. Throughput: 0: 230.1. Samples: 1713780. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:47,162][00307] Avg episode reward: [(0, '24.032')] -[2024-09-01 09:20:52,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8388608. Throughput: 0: 230.9. Samples: 1714244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:52,158][00307] Avg episode reward: [(0, '23.704')] -[2024-09-01 09:20:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8392704. Throughput: 0: 219.7. Samples: 1715410. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:20:57,160][00307] Avg episode reward: [(0, '24.588')] -[2024-09-01 09:21:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8396800. Throughput: 0: 227.2. Samples: 1716926. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:21:02,168][00307] Avg episode reward: [(0, '24.082')] -[2024-09-01 09:21:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8404992. Throughput: 0: 232.8. Samples: 1717876. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:07,157][00307] Avg episode reward: [(0, '24.229')] -[2024-09-01 09:21:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8409088. Throughput: 0: 232.5. Samples: 1719004. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:12,157][00307] Avg episode reward: [(0, '24.406')] -[2024-09-01 09:21:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.8). Total num frames: 8409088. Throughput: 0: 213.7. Samples: 1720070. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:17,156][00307] Avg episode reward: [(0, '24.369')] -[2024-09-01 09:21:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8417280. Throughput: 0: 231.9. Samples: 1721114. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:22,156][00307] Avg episode reward: [(0, '24.529')] -[2024-09-01 09:21:25,386][16881] Updated weights for policy 0, policy_version 2056 (0.1044) -[2024-09-01 09:21:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8421376. Throughput: 0: 230.5. Samples: 1722302. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 09:21:27,160][00307] Avg episode reward: [(0, '25.417')] -[2024-09-01 09:21:32,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8425472. Throughput: 0: 216.3. Samples: 1723512. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 09:21:32,159][00307] Avg episode reward: [(0, '25.905')] -[2024-09-01 09:21:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8429568. Throughput: 0: 218.4. Samples: 1724074. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 09:21:37,158][00307] Avg episode reward: [(0, '25.839')] -[2024-09-01 09:21:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8433664. Throughput: 0: 228.5. Samples: 1725692. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) -[2024-09-01 09:21:42,163][00307] Avg episode reward: [(0, '27.099')] -[2024-09-01 09:21:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8437760. Throughput: 0: 226.9. Samples: 1727138. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:47,168][00307] Avg episode reward: [(0, '27.473')] -[2024-09-01 09:21:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8441856. Throughput: 0: 210.4. Samples: 1727346. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:52,162][00307] Avg episode reward: [(0, '27.814')] -[2024-09-01 09:21:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8445952. Throughput: 0: 224.2. Samples: 1729092. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:21:57,162][00307] Avg episode reward: [(0, '27.899')] -[2024-09-01 09:22:01,231][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002064_8454144.pth... -[2024-09-01 09:22:01,343][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002012_8241152.pth -[2024-09-01 09:22:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8454144. Throughput: 0: 231.5. Samples: 1730486. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:22:02,156][00307] Avg episode reward: [(0, '28.539')] -[2024-09-01 09:22:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8454144. Throughput: 0: 224.0. Samples: 1731196. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:22:07,156][00307] Avg episode reward: [(0, '28.785')] -[2024-09-01 09:22:11,690][16881] Updated weights for policy 0, policy_version 2066 (0.1730) -[2024-09-01 09:22:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8462336. Throughput: 0: 222.3. Samples: 1732306. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:12,157][00307] Avg episode reward: [(0, '29.355')] -[2024-09-01 09:22:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8466432. Throughput: 0: 230.3. Samples: 1733874. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:17,158][00307] Avg episode reward: [(0, '28.943')] -[2024-09-01 09:22:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8470528. Throughput: 0: 229.4. Samples: 1734396. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:22,160][00307] Avg episode reward: [(0, '29.031')] -[2024-09-01 09:22:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8474624. Throughput: 0: 216.3. Samples: 1735426. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:27,163][00307] Avg episode reward: [(0, '28.447')] -[2024-09-01 09:22:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8478720. Throughput: 0: 226.0. Samples: 1737306. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:22:32,157][00307] Avg episode reward: [(0, '28.718')] -[2024-09-01 09:22:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8482816. Throughput: 0: 230.5. Samples: 1737720. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:22:37,156][00307] Avg episode reward: [(0, '28.642')] -[2024-09-01 09:22:42,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8486912. Throughput: 0: 221.5. Samples: 1739062. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:42,163][00307] Avg episode reward: [(0, '28.361')] -[2024-09-01 09:22:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 8491008. Throughput: 0: 222.2. Samples: 1740484. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:47,163][00307] Avg episode reward: [(0, '28.485')] -[2024-09-01 09:22:52,154][00307] Fps is (10 sec: 1229.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8499200. Throughput: 0: 222.8. Samples: 1741222. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:22:52,165][00307] Avg episode reward: [(0, '28.900')] -[2024-09-01 09:22:57,071][16881] Updated weights for policy 0, policy_version 2076 (0.1057) -[2024-09-01 09:22:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8503296. Throughput: 0: 227.5. Samples: 1742544. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:22:57,167][00307] Avg episode reward: [(0, '29.124')] -[2024-09-01 09:23:01,173][16868] Signal inference workers to stop experience collection... (1700 times) -[2024-09-01 09:23:01,268][16881] InferenceWorker_p0-w0: stopping experience collection (1700 times) -[2024-09-01 09:23:02,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 8503296. Throughput: 0: 214.4. Samples: 1743520. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:23:02,161][00307] Avg episode reward: [(0, '28.298')] -[2024-09-01 09:23:02,539][16868] Signal inference workers to resume experience collection... (1700 times) -[2024-09-01 09:23:02,540][16881] InferenceWorker_p0-w0: resuming experience collection (1700 times) -[2024-09-01 09:23:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8511488. Throughput: 0: 225.7. Samples: 1744552. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:23:07,157][00307] Avg episode reward: [(0, '28.167')] -[2024-09-01 09:23:12,155][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8515584. Throughput: 0: 228.4. Samples: 1745704. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:23:12,159][00307] Avg episode reward: [(0, '28.075')] -[2024-09-01 09:23:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8519680. Throughput: 0: 216.1. Samples: 1747030. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:23:17,161][00307] Avg episode reward: [(0, '28.268')] -[2024-09-01 09:23:22,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8523776. Throughput: 0: 219.2. Samples: 1747582. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:23:22,157][00307] Avg episode reward: [(0, '28.255')] -[2024-09-01 09:23:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8527872. Throughput: 0: 225.8. Samples: 1749220. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:23:27,166][00307] Avg episode reward: [(0, '27.026')] -[2024-09-01 09:23:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8531968. Throughput: 0: 225.1. Samples: 1750614. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:23:32,157][00307] Avg episode reward: [(0, '26.060')] -[2024-09-01 09:23:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8536064. Throughput: 0: 217.1. Samples: 1750990. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:23:37,161][00307] Avg episode reward: [(0, '26.321')] -[2024-09-01 09:23:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 8540160. Throughput: 0: 222.5. Samples: 1752556. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:23:42,164][00307] Avg episode reward: [(0, '25.631')] -[2024-09-01 09:23:43,302][16881] Updated weights for policy 0, policy_version 2086 (0.2046) -[2024-09-01 09:23:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8548352. Throughput: 0: 228.2. Samples: 1753790. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:23:47,162][00307] Avg episode reward: [(0, '24.553')] -[2024-09-01 09:23:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8552448. Throughput: 0: 222.9. Samples: 1754584. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:23:52,157][00307] Avg episode reward: [(0, '23.980')] -[2024-09-01 09:23:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8556544. Throughput: 0: 221.9. Samples: 1755688. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:23:57,161][00307] Avg episode reward: [(0, '24.007')] -[2024-09-01 09:24:00,951][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002090_8560640.pth... -[2024-09-01 09:24:01,061][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002038_8347648.pth -[2024-09-01 09:24:02,159][00307] Fps is (10 sec: 818.8, 60 sec: 955.6, 300 sec: 888.6). Total num frames: 8560640. Throughput: 0: 224.9. Samples: 1757152. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:24:02,163][00307] Avg episode reward: [(0, '23.681')] -[2024-09-01 09:24:07,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 8564736. Throughput: 0: 226.2. Samples: 1757762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:24:07,163][00307] Avg episode reward: [(0, '23.437')] -[2024-09-01 09:24:12,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8568832. Throughput: 0: 213.3. Samples: 1758818. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:24:12,157][00307] Avg episode reward: [(0, '23.437')] -[2024-09-01 09:24:17,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8572928. Throughput: 0: 217.2. Samples: 1760390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:24:17,162][00307] Avg episode reward: [(0, '24.313')] -[2024-09-01 09:24:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8577024. Throughput: 0: 223.7. Samples: 1761058. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:24:22,162][00307] Avg episode reward: [(0, '24.393')] -[2024-09-01 09:24:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8581120. Throughput: 0: 219.7. Samples: 1762442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:24:27,157][00307] Avg episode reward: [(0, '24.393')] -[2024-09-01 09:24:28,796][16881] Updated weights for policy 0, policy_version 2096 (0.0058) -[2024-09-01 09:24:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8585216. Throughput: 0: 221.4. Samples: 1763754. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:24:32,160][00307] Avg episode reward: [(0, '24.535')] -[2024-09-01 09:24:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8589312. Throughput: 0: 215.5. Samples: 1764280. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:24:37,162][00307] Avg episode reward: [(0, '24.171')] -[2024-09-01 09:24:42,154][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8597504. Throughput: 0: 225.6. Samples: 1765842. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:24:42,161][00307] Avg episode reward: [(0, '24.402')] -[2024-09-01 09:24:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8597504. Throughput: 0: 215.8. Samples: 1766862. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:24:47,157][00307] Avg episode reward: [(0, '24.606')] -[2024-09-01 09:24:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8605696. Throughput: 0: 221.9. Samples: 1767744. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:24:52,156][00307] Avg episode reward: [(0, '24.421')] -[2024-09-01 09:24:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8609792. Throughput: 0: 228.6. Samples: 1769104. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:24:57,158][00307] Avg episode reward: [(0, '24.605')] -[2024-09-01 09:25:02,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8613888. Throughput: 0: 224.3. Samples: 1770484. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:02,162][00307] Avg episode reward: [(0, '24.562')] -[2024-09-01 09:25:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 8617984. Throughput: 0: 218.9. Samples: 1770910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:25:07,159][00307] Avg episode reward: [(0, '24.503')] -[2024-09-01 09:25:12,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8622080. Throughput: 0: 219.5. Samples: 1772320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:25:12,157][00307] Avg episode reward: [(0, '24.411')] -[2024-09-01 09:25:13,514][16881] Updated weights for policy 0, policy_version 2106 (0.1031) -[2024-09-01 09:25:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8626176. Throughput: 0: 228.0. Samples: 1774016. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:17,158][00307] Avg episode reward: [(0, '24.548')] -[2024-09-01 09:25:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8630272. Throughput: 0: 226.2. Samples: 1774460. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:22,159][00307] Avg episode reward: [(0, '24.821')] -[2024-09-01 09:25:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8634368. Throughput: 0: 224.1. Samples: 1775926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:27,157][00307] Avg episode reward: [(0, '24.900')] -[2024-09-01 09:25:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8642560. Throughput: 0: 232.0. Samples: 1777300. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:32,163][00307] Avg episode reward: [(0, '25.229')] -[2024-09-01 09:25:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8646656. Throughput: 0: 230.4. Samples: 1778112. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:37,159][00307] Avg episode reward: [(0, '25.353')] -[2024-09-01 09:25:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8650752. Throughput: 0: 221.9. Samples: 1779088. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:42,165][00307] Avg episode reward: [(0, '25.560')] -[2024-09-01 09:25:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8654848. Throughput: 0: 226.4. Samples: 1780670. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:25:47,156][00307] Avg episode reward: [(0, '26.198')] -[2024-09-01 09:25:52,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8658944. Throughput: 0: 228.9. Samples: 1781212. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:25:52,162][00307] Avg episode reward: [(0, '27.225')] -[2024-09-01 09:25:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8663040. Throughput: 0: 226.0. Samples: 1782492. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:25:57,162][00307] Avg episode reward: [(0, '27.055')] -[2024-09-01 09:26:00,133][16881] Updated weights for policy 0, policy_version 2116 (0.1176) -[2024-09-01 09:26:00,145][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002116_8667136.pth... -[2024-09-01 09:26:00,256][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002064_8454144.pth -[2024-09-01 09:26:02,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8667136. Throughput: 0: 222.7. Samples: 1784038. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:02,159][00307] Avg episode reward: [(0, '26.529')] -[2024-09-01 09:26:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8671232. Throughput: 0: 226.4. Samples: 1784648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:07,162][00307] Avg episode reward: [(0, '26.367')] -[2024-09-01 09:26:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8675328. Throughput: 0: 227.8. Samples: 1786176. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:12,160][00307] Avg episode reward: [(0, '26.409')] -[2024-09-01 09:26:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8679424. Throughput: 0: 222.0. Samples: 1787292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:17,161][00307] Avg episode reward: [(0, '26.009')] -[2024-09-01 09:26:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8683520. Throughput: 0: 219.8. Samples: 1788002. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:26:22,157][00307] Avg episode reward: [(0, '25.936')] -[2024-09-01 09:26:27,159][00307] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8691712. Throughput: 0: 228.9. Samples: 1789390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:27,164][00307] Avg episode reward: [(0, '25.956')] -[2024-09-01 09:26:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8695808. Throughput: 0: 217.5. Samples: 1790456. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:26:32,158][00307] Avg episode reward: [(0, '25.933')] -[2024-09-01 09:26:37,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8699904. Throughput: 0: 223.2. Samples: 1791256. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:26:37,164][00307] Avg episode reward: [(0, '25.805')] -[2024-09-01 09:26:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8704000. Throughput: 0: 226.8. Samples: 1792698. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:26:42,166][00307] Avg episode reward: [(0, '25.951')] -[2024-09-01 09:26:44,404][16881] Updated weights for policy 0, policy_version 2126 (0.0553) -[2024-09-01 09:26:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8708096. Throughput: 0: 224.5. Samples: 1794142. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:26:47,157][00307] Avg episode reward: [(0, '26.459')] -[2024-09-01 09:26:48,811][16868] Signal inference workers to stop experience collection... (1750 times) -[2024-09-01 09:26:48,865][16881] InferenceWorker_p0-w0: stopping experience collection (1750 times) -[2024-09-01 09:26:50,273][16868] Signal inference workers to resume experience collection... (1750 times) -[2024-09-01 09:26:50,274][16881] InferenceWorker_p0-w0: resuming experience collection (1750 times) -[2024-09-01 09:26:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8712192. Throughput: 0: 219.8. Samples: 1794538. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:26:52,160][00307] Avg episode reward: [(0, '26.424')] -[2024-09-01 09:26:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8716288. Throughput: 0: 219.0. Samples: 1796030. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:26:57,163][00307] Avg episode reward: [(0, '27.100')] -[2024-09-01 09:27:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8720384. Throughput: 0: 230.0. Samples: 1797640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:27:02,157][00307] Avg episode reward: [(0, '27.100')] -[2024-09-01 09:27:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8724480. Throughput: 0: 225.5. Samples: 1798148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:07,159][00307] Avg episode reward: [(0, '27.594')] -[2024-09-01 09:27:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8728576. Throughput: 0: 221.6. Samples: 1799360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:12,161][00307] Avg episode reward: [(0, '27.126')] -[2024-09-01 09:27:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8736768. Throughput: 0: 213.6. Samples: 1800066. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:17,156][00307] Avg episode reward: [(0, '27.288')] -[2024-09-01 09:27:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8740864. Throughput: 0: 232.2. Samples: 1801706. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:27:22,156][00307] Avg episode reward: [(0, '27.549')] -[2024-09-01 09:27:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 888.6). Total num frames: 8740864. Throughput: 0: 222.8. Samples: 1802722. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:27:27,159][00307] Avg episode reward: [(0, '27.239')] -[2024-09-01 09:27:31,072][16881] Updated weights for policy 0, policy_version 2136 (0.0575) -[2024-09-01 09:27:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8749056. Throughput: 0: 222.4. Samples: 1804152. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:32,163][00307] Avg episode reward: [(0, '26.838')] -[2024-09-01 09:27:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8753152. Throughput: 0: 228.1. Samples: 1804804. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:37,157][00307] Avg episode reward: [(0, '26.703')] -[2024-09-01 09:27:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8757248. Throughput: 0: 224.8. Samples: 1806148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:42,163][00307] Avg episode reward: [(0, '27.299')] -[2024-09-01 09:27:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8761344. Throughput: 0: 220.1. Samples: 1807546. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:47,157][00307] Avg episode reward: [(0, '27.096')] -[2024-09-01 09:27:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8765440. Throughput: 0: 222.8. Samples: 1808172. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:52,165][00307] Avg episode reward: [(0, '27.169')] -[2024-09-01 09:27:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8769536. Throughput: 0: 234.4. Samples: 1809906. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:27:57,158][00307] Avg episode reward: [(0, '26.974')] -[2024-09-01 09:28:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8773632. Throughput: 0: 240.8. Samples: 1810900. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:28:02,160][00307] Avg episode reward: [(0, '28.179')] -[2024-09-01 09:28:03,552][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002143_8777728.pth... -[2024-09-01 09:28:03,670][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002090_8560640.pth -[2024-09-01 09:28:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8777728. Throughput: 0: 219.2. Samples: 1811568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:28:07,160][00307] Avg episode reward: [(0, '28.056')] -[2024-09-01 09:28:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8785920. Throughput: 0: 228.6. Samples: 1813008. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:28:12,157][00307] Avg episode reward: [(0, '28.029')] -[2024-09-01 09:28:16,422][16881] Updated weights for policy 0, policy_version 2146 (0.0040) -[2024-09-01 09:28:17,159][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 8790016. Throughput: 0: 222.7. Samples: 1814174. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:17,166][00307] Avg episode reward: [(0, '27.917')] -[2024-09-01 09:28:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8794112. Throughput: 0: 222.3. Samples: 1814808. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:22,157][00307] Avg episode reward: [(0, '27.917')] -[2024-09-01 09:28:27,154][00307] Fps is (10 sec: 819.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8798208. Throughput: 0: 223.2. Samples: 1816194. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:27,162][00307] Avg episode reward: [(0, '29.303')] -[2024-09-01 09:28:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8802304. Throughput: 0: 230.6. Samples: 1817922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:32,157][00307] Avg episode reward: [(0, '29.183')] -[2024-09-01 09:28:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8806400. Throughput: 0: 221.4. Samples: 1818134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:37,159][00307] Avg episode reward: [(0, '29.217')] -[2024-09-01 09:28:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8810496. Throughput: 0: 211.5. Samples: 1819424. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:42,171][00307] Avg episode reward: [(0, '29.771')] -[2024-09-01 09:28:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8814592. Throughput: 0: 229.4. Samples: 1821224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:47,157][00307] Avg episode reward: [(0, '29.656')] -[2024-09-01 09:28:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8818688. Throughput: 0: 228.3. Samples: 1821840. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:28:52,157][00307] Avg episode reward: [(0, '29.523')] -[2024-09-01 09:28:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8822784. Throughput: 0: 218.5. Samples: 1822842. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:28:57,166][00307] Avg episode reward: [(0, '29.443')] -[2024-09-01 09:29:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8826880. Throughput: 0: 225.7. Samples: 1824328. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:02,161][00307] Avg episode reward: [(0, '29.185')] -[2024-09-01 09:29:02,900][16881] Updated weights for policy 0, policy_version 2156 (0.2231) -[2024-09-01 09:29:07,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8835072. Throughput: 0: 231.4. Samples: 1825220. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:29:07,159][00307] Avg episode reward: [(0, '29.238')] -[2024-09-01 09:29:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8835072. Throughput: 0: 227.0. Samples: 1826408. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:29:12,161][00307] Avg episode reward: [(0, '28.122')] -[2024-09-01 09:29:17,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8843264. Throughput: 0: 196.3. Samples: 1826754. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:17,167][00307] Avg episode reward: [(0, '27.533')] -[2024-09-01 09:29:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8847360. Throughput: 0: 228.8. Samples: 1828432. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:22,160][00307] Avg episode reward: [(0, '27.675')] -[2024-09-01 09:29:27,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 8851456. Throughput: 0: 226.7. Samples: 1829624. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:27,160][00307] Avg episode reward: [(0, '27.764')] -[2024-09-01 09:29:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8855552. Throughput: 0: 213.7. Samples: 1830842. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:32,158][00307] Avg episode reward: [(0, '27.695')] -[2024-09-01 09:29:37,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8859648. Throughput: 0: 215.3. Samples: 1831528. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:37,157][00307] Avg episode reward: [(0, '28.079')] -[2024-09-01 09:29:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8863744. Throughput: 0: 234.3. Samples: 1833386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:42,160][00307] Avg episode reward: [(0, '28.685')] -[2024-09-01 09:29:47,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 8867840. Throughput: 0: 217.6. Samples: 1834120. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:47,160][00307] Avg episode reward: [(0, '28.648')] -[2024-09-01 09:29:49,699][16881] Updated weights for policy 0, policy_version 2166 (0.1724) -[2024-09-01 09:29:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8871936. Throughput: 0: 210.1. Samples: 1834676. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:52,156][00307] Avg episode reward: [(0, '28.374')] -[2024-09-01 09:29:57,155][00307] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8880128. Throughput: 0: 226.9. Samples: 1836620. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:29:57,161][00307] Avg episode reward: [(0, '28.935')] -[2024-09-01 09:30:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8880128. Throughput: 0: 241.6. Samples: 1837626. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:02,158][00307] Avg episode reward: [(0, '29.219')] -[2024-09-01 09:30:02,200][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002169_8884224.pth... -[2024-09-01 09:30:02,357][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002116_8667136.pth -[2024-09-01 09:30:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8884224. Throughput: 0: 218.4. Samples: 1838260. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:07,162][00307] Avg episode reward: [(0, '29.827')] -[2024-09-01 09:30:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8892416. Throughput: 0: 224.1. Samples: 1839710. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:12,157][00307] Avg episode reward: [(0, '28.831')] -[2024-09-01 09:30:17,157][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 8896512. Throughput: 0: 232.2. Samples: 1841292. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:17,164][00307] Avg episode reward: [(0, '28.879')] -[2024-09-01 09:30:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8900608. Throughput: 0: 227.2. Samples: 1841750. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:22,157][00307] Avg episode reward: [(0, '28.587')] -[2024-09-01 09:30:27,156][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8904704. Throughput: 0: 207.6. Samples: 1842728. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:27,159][00307] Avg episode reward: [(0, '28.505')] -[2024-09-01 09:30:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8908800. Throughput: 0: 231.6. Samples: 1844540. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:32,157][00307] Avg episode reward: [(0, '28.352')] -[2024-09-01 09:30:34,091][16881] Updated weights for policy 0, policy_version 2176 (0.1554) -[2024-09-01 09:30:37,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8912896. Throughput: 0: 233.4. Samples: 1845180. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:30:37,157][00307] Avg episode reward: [(0, '27.960')] -[2024-09-01 09:30:37,759][16868] Signal inference workers to stop experience collection... (1800 times) -[2024-09-01 09:30:37,893][16881] InferenceWorker_p0-w0: stopping experience collection (1800 times) -[2024-09-01 09:30:39,738][16868] Signal inference workers to resume experience collection... (1800 times) -[2024-09-01 09:30:39,739][16881] InferenceWorker_p0-w0: resuming experience collection (1800 times) -[2024-09-01 09:30:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8916992. Throughput: 0: 212.5. Samples: 1846182. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:30:42,160][00307] Avg episode reward: [(0, '27.951')] -[2024-09-01 09:30:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8921088. Throughput: 0: 228.0. Samples: 1847888. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:47,165][00307] Avg episode reward: [(0, '28.012')] -[2024-09-01 09:30:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8925184. Throughput: 0: 229.5. Samples: 1848586. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:52,157][00307] Avg episode reward: [(0, '27.631')] -[2024-09-01 09:30:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 8929280. Throughput: 0: 224.7. Samples: 1849822. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:30:57,157][00307] Avg episode reward: [(0, '28.083')] -[2024-09-01 09:31:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8933376. Throughput: 0: 214.9. Samples: 1850960. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:02,165][00307] Avg episode reward: [(0, '27.889')] -[2024-09-01 09:31:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8941568. Throughput: 0: 224.0. Samples: 1851828. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:07,156][00307] Avg episode reward: [(0, '27.482')] -[2024-09-01 09:31:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8945664. Throughput: 0: 229.7. Samples: 1853064. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:12,163][00307] Avg episode reward: [(0, '27.034')] -[2024-09-01 09:31:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8949760. Throughput: 0: 212.1. Samples: 1854084. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:17,161][00307] Avg episode reward: [(0, '27.052')] -[2024-09-01 09:31:21,254][16881] Updated weights for policy 0, policy_version 2186 (0.0053) -[2024-09-01 09:31:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8953856. Throughput: 0: 219.5. Samples: 1855056. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:22,165][00307] Avg episode reward: [(0, '28.072')] -[2024-09-01 09:31:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8957952. Throughput: 0: 228.0. Samples: 1856442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:27,161][00307] Avg episode reward: [(0, '27.409')] -[2024-09-01 09:31:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8962048. Throughput: 0: 218.5. Samples: 1857722. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:32,159][00307] Avg episode reward: [(0, '28.140')] -[2024-09-01 09:31:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8966144. Throughput: 0: 212.3. Samples: 1858138. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:37,161][00307] Avg episode reward: [(0, '26.992')] -[2024-09-01 09:31:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8970240. Throughput: 0: 222.8. Samples: 1859846. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:42,165][00307] Avg episode reward: [(0, '26.193')] -[2024-09-01 09:31:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8974336. Throughput: 0: 226.3. Samples: 1861142. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:47,157][00307] Avg episode reward: [(0, '26.224')] -[2024-09-01 09:31:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8978432. Throughput: 0: 217.4. Samples: 1861610. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:52,160][00307] Avg episode reward: [(0, '26.562')] -[2024-09-01 09:31:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8982528. Throughput: 0: 223.8. Samples: 1863136. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:31:57,156][00307] Avg episode reward: [(0, '26.486')] -[2024-09-01 09:32:01,328][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002195_8990720.pth... -[2024-09-01 09:32:01,441][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002143_8777728.pth -[2024-09-01 09:32:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 8990720. Throughput: 0: 231.4. Samples: 1864496. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:32:02,157][00307] Avg episode reward: [(0, '26.759')] -[2024-09-01 09:32:06,463][16881] Updated weights for policy 0, policy_version 2196 (0.0565) -[2024-09-01 09:32:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 8994816. Throughput: 0: 226.8. Samples: 1865264. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:07,157][00307] Avg episode reward: [(0, '27.183')] -[2024-09-01 09:32:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 8998912. Throughput: 0: 218.4. Samples: 1866272. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:12,162][00307] Avg episode reward: [(0, '27.018')] -[2024-09-01 09:32:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9003008. Throughput: 0: 225.8. Samples: 1867882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:17,157][00307] Avg episode reward: [(0, '26.693')] -[2024-09-01 09:32:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9007104. Throughput: 0: 230.6. Samples: 1868514. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:22,159][00307] Avg episode reward: [(0, '26.372')] -[2024-09-01 09:32:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9011200. Throughput: 0: 217.6. Samples: 1869636. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:27,159][00307] Avg episode reward: [(0, '26.333')] -[2024-09-01 09:32:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9015296. Throughput: 0: 224.0. Samples: 1871224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:32:32,156][00307] Avg episode reward: [(0, '26.309')] -[2024-09-01 09:32:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9019392. Throughput: 0: 228.0. Samples: 1871870. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:32:37,156][00307] Avg episode reward: [(0, '25.472')] -[2024-09-01 09:32:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9023488. Throughput: 0: 228.3. Samples: 1873410. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:32:42,159][00307] Avg episode reward: [(0, '25.935')] -[2024-09-01 09:32:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9027584. Throughput: 0: 221.8. Samples: 1874476. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:32:47,156][00307] Avg episode reward: [(0, '25.345')] -[2024-09-01 09:32:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9031680. Throughput: 0: 216.8. Samples: 1875020. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:32:52,156][00307] Avg episode reward: [(0, '25.437')] -[2024-09-01 09:32:52,219][16881] Updated weights for policy 0, policy_version 2206 (0.1097) -[2024-09-01 09:32:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9039872. Throughput: 0: 228.4. Samples: 1876548. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:32:57,156][00307] Avg episode reward: [(0, '26.056')] -[2024-09-01 09:33:02,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9039872. Throughput: 0: 216.0. Samples: 1877602. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:33:02,162][00307] Avg episode reward: [(0, '26.099')] -[2024-09-01 09:33:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9048064. Throughput: 0: 220.2. Samples: 1878422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:07,156][00307] Avg episode reward: [(0, '26.163')] -[2024-09-01 09:33:12,154][00307] Fps is (10 sec: 1229.0, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9052160. Throughput: 0: 222.6. Samples: 1879652. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:33:12,159][00307] Avg episode reward: [(0, '26.174')] -[2024-09-01 09:33:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9056256. Throughput: 0: 217.7. Samples: 1881022. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:33:17,156][00307] Avg episode reward: [(0, '26.359')] -[2024-09-01 09:33:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9060352. Throughput: 0: 216.3. Samples: 1881604. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:33:22,160][00307] Avg episode reward: [(0, '26.676')] -[2024-09-01 09:33:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9064448. Throughput: 0: 211.0. Samples: 1882904. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:33:27,159][00307] Avg episode reward: [(0, '26.965')] -[2024-09-01 09:33:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9068544. Throughput: 0: 226.8. Samples: 1884684. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:32,158][00307] Avg episode reward: [(0, '26.821')] -[2024-09-01 09:33:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9072640. Throughput: 0: 223.9. Samples: 1885094. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:37,160][00307] Avg episode reward: [(0, '26.981')] -[2024-09-01 09:33:39,170][16881] Updated weights for policy 0, policy_version 2216 (0.2218) -[2024-09-01 09:33:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9076736. Throughput: 0: 221.1. Samples: 1886496. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:42,163][00307] Avg episode reward: [(0, '27.296')] -[2024-09-01 09:33:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9084928. Throughput: 0: 207.3. Samples: 1886928. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:47,162][00307] Avg episode reward: [(0, '26.962')] -[2024-09-01 09:33:52,162][00307] Fps is (10 sec: 1227.8, 60 sec: 955.6, 300 sec: 902.5). Total num frames: 9089024. Throughput: 0: 227.8. Samples: 1888676. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:52,172][00307] Avg episode reward: [(0, '27.063')] -[2024-09-01 09:33:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9089024. Throughput: 0: 223.1. Samples: 1889690. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:33:57,158][00307] Avg episode reward: [(0, '27.671')] -[2024-09-01 09:34:01,410][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002221_9097216.pth... -[2024-09-01 09:34:01,528][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002169_8884224.pth -[2024-09-01 09:34:02,154][00307] Fps is (10 sec: 819.9, 60 sec: 955.8, 300 sec: 888.6). Total num frames: 9097216. Throughput: 0: 224.4. Samples: 1891118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:34:02,156][00307] Avg episode reward: [(0, '27.643')] -[2024-09-01 09:34:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9101312. Throughput: 0: 228.9. Samples: 1891906. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:34:07,156][00307] Avg episode reward: [(0, '27.894')] -[2024-09-01 09:34:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9105408. Throughput: 0: 226.2. Samples: 1893082. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:34:12,161][00307] Avg episode reward: [(0, '27.692')] -[2024-09-01 09:34:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9109504. Throughput: 0: 213.8. Samples: 1894304. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:17,156][00307] Avg episode reward: [(0, '27.288')] -[2024-09-01 09:34:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9113600. Throughput: 0: 220.1. Samples: 1895000. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:22,163][00307] Avg episode reward: [(0, '26.264')] -[2024-09-01 09:34:24,198][16881] Updated weights for policy 0, policy_version 2226 (0.2569) -[2024-09-01 09:34:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9117696. Throughput: 0: 222.4. Samples: 1896504. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:27,160][00307] Avg episode reward: [(0, '26.313')] -[2024-09-01 09:34:27,983][16868] Signal inference workers to stop experience collection... (1850 times) -[2024-09-01 09:34:28,063][16881] InferenceWorker_p0-w0: stopping experience collection (1850 times) -[2024-09-01 09:34:29,196][16868] Signal inference workers to resume experience collection... (1850 times) -[2024-09-01 09:34:29,197][16881] InferenceWorker_p0-w0: resuming experience collection (1850 times) -[2024-09-01 09:34:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9121792. Throughput: 0: 236.0. Samples: 1897550. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:32,161][00307] Avg episode reward: [(0, '25.812')] -[2024-09-01 09:34:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9125888. Throughput: 0: 217.2. Samples: 1898450. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:37,167][00307] Avg episode reward: [(0, '26.327')] -[2024-09-01 09:34:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9134080. Throughput: 0: 232.6. Samples: 1900158. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:42,161][00307] Avg episode reward: [(0, '26.327')] -[2024-09-01 09:34:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9134080. Throughput: 0: 223.0. Samples: 1901154. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:47,156][00307] Avg episode reward: [(0, '27.136')] -[2024-09-01 09:34:52,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 9138176. Throughput: 0: 217.4. Samples: 1901688. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:52,164][00307] Avg episode reward: [(0, '27.109')] -[2024-09-01 09:34:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9146368. Throughput: 0: 225.7. Samples: 1903238. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:34:57,157][00307] Avg episode reward: [(0, '27.900')] -[2024-09-01 09:35:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9150464. Throughput: 0: 231.8. Samples: 1904734. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:35:02,160][00307] Avg episode reward: [(0, '28.078')] -[2024-09-01 09:35:07,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9154560. Throughput: 0: 227.4. Samples: 1905234. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:35:07,170][00307] Avg episode reward: [(0, '28.263')] -[2024-09-01 09:35:10,278][16881] Updated weights for policy 0, policy_version 2236 (0.0685) -[2024-09-01 09:35:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9158656. Throughput: 0: 222.0. Samples: 1906496. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:35:12,160][00307] Avg episode reward: [(0, '28.522')] -[2024-09-01 09:35:17,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9162752. Throughput: 0: 236.5. Samples: 1908192. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:35:17,167][00307] Avg episode reward: [(0, '28.376')] -[2024-09-01 09:35:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9166848. Throughput: 0: 227.8. Samples: 1908702. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:22,161][00307] Avg episode reward: [(0, '28.806')] -[2024-09-01 09:35:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9170944. Throughput: 0: 212.3. Samples: 1909712. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:27,157][00307] Avg episode reward: [(0, '28.486')] -[2024-09-01 09:35:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9175040. Throughput: 0: 226.3. Samples: 1911336. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:32,167][00307] Avg episode reward: [(0, '28.315')] -[2024-09-01 09:35:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9183232. Throughput: 0: 237.1. Samples: 1912358. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:37,157][00307] Avg episode reward: [(0, '28.038')] -[2024-09-01 09:35:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9183232. Throughput: 0: 226.1. Samples: 1913412. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:42,157][00307] Avg episode reward: [(0, '28.157')] -[2024-09-01 09:35:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9191424. Throughput: 0: 219.8. Samples: 1914624. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:47,161][00307] Avg episode reward: [(0, '28.777')] -[2024-09-01 09:35:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9195520. Throughput: 0: 226.4. Samples: 1915422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:35:52,165][00307] Avg episode reward: [(0, '28.872')] -[2024-09-01 09:35:54,711][16881] Updated weights for policy 0, policy_version 2246 (0.1041) -[2024-09-01 09:35:57,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9199616. Throughput: 0: 228.0. Samples: 1916758. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:35:57,159][00307] Avg episode reward: [(0, '29.072')] -[2024-09-01 09:36:00,680][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002247_9203712.pth... -[2024-09-01 09:36:00,765][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002195_8990720.pth -[2024-09-01 09:36:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9203712. Throughput: 0: 217.6. Samples: 1917986. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:36:02,156][00307] Avg episode reward: [(0, '29.072')] -[2024-09-01 09:36:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9207808. Throughput: 0: 216.7. Samples: 1918454. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:07,156][00307] Avg episode reward: [(0, '29.661')] -[2024-09-01 09:36:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9211904. Throughput: 0: 235.1. Samples: 1920290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:12,160][00307] Avg episode reward: [(0, '30.271')] -[2024-09-01 09:36:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9216000. Throughput: 0: 222.0. Samples: 1921324. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:17,158][00307] Avg episode reward: [(0, '30.768')] -[2024-09-01 09:36:18,992][16868] Saving new best policy, reward=30.768! -[2024-09-01 09:36:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9220096. Throughput: 0: 212.2. Samples: 1921906. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:22,158][00307] Avg episode reward: [(0, '30.879')] -[2024-09-01 09:36:26,806][16868] Saving new best policy, reward=30.879! -[2024-09-01 09:36:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9228288. Throughput: 0: 225.4. Samples: 1923556. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:27,157][00307] Avg episode reward: [(0, '30.958')] -[2024-09-01 09:36:32,080][16868] Saving new best policy, reward=30.958! -[2024-09-01 09:36:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9232384. Throughput: 0: 221.8. Samples: 1924606. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:36:32,159][00307] Avg episode reward: [(0, '30.915')] -[2024-09-01 09:36:37,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9232384. Throughput: 0: 219.1. Samples: 1925280. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:36:37,166][00307] Avg episode reward: [(0, '31.182')] -[2024-09-01 09:36:41,359][16868] Saving new best policy, reward=31.182! -[2024-09-01 09:36:41,378][16881] Updated weights for policy 0, policy_version 2256 (0.1064) -[2024-09-01 09:36:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9240576. Throughput: 0: 220.7. Samples: 1926688. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:42,159][00307] Avg episode reward: [(0, '31.309')] -[2024-09-01 09:36:45,290][16868] Saving new best policy, reward=31.309! -[2024-09-01 09:36:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9244672. Throughput: 0: 226.8. Samples: 1928192. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:47,159][00307] Avg episode reward: [(0, '31.863')] -[2024-09-01 09:36:50,939][16868] Saving new best policy, reward=31.863! -[2024-09-01 09:36:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9248768. Throughput: 0: 226.8. Samples: 1928662. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:52,157][00307] Avg episode reward: [(0, '31.786')] -[2024-09-01 09:36:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9252864. Throughput: 0: 210.6. Samples: 1929766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:36:57,157][00307] Avg episode reward: [(0, '32.333')] -[2024-09-01 09:36:59,687][16868] Saving new best policy, reward=32.333! -[2024-09-01 09:37:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9256960. Throughput: 0: 230.8. Samples: 1931712. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:02,157][00307] Avg episode reward: [(0, '32.421')] -[2024-09-01 09:37:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9261056. Throughput: 0: 226.2. Samples: 1932084. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:07,166][00307] Avg episode reward: [(0, '33.071')] -[2024-09-01 09:37:09,618][16868] Saving new best policy, reward=32.421! -[2024-09-01 09:37:09,741][16868] Saving new best policy, reward=33.071! -[2024-09-01 09:37:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9265152. Throughput: 0: 212.4. Samples: 1933116. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:12,164][00307] Avg episode reward: [(0, '32.551')] -[2024-09-01 09:37:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9269248. Throughput: 0: 226.5. Samples: 1934798. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:17,167][00307] Avg episode reward: [(0, '32.682')] -[2024-09-01 09:37:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9277440. Throughput: 0: 230.9. Samples: 1935672. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:22,159][00307] Avg episode reward: [(0, '32.184')] -[2024-09-01 09:37:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9277440. Throughput: 0: 224.2. Samples: 1936776. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:27,156][00307] Avg episode reward: [(0, '32.283')] -[2024-09-01 09:37:27,955][16881] Updated weights for policy 0, policy_version 2266 (0.0063) -[2024-09-01 09:37:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9281536. Throughput: 0: 214.8. Samples: 1937858. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:32,157][00307] Avg episode reward: [(0, '32.411')] -[2024-09-01 09:37:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9289728. Throughput: 0: 227.1. Samples: 1938880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:37,162][00307] Avg episode reward: [(0, '31.705')] -[2024-09-01 09:37:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9293824. Throughput: 0: 229.7. Samples: 1940104. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:42,161][00307] Avg episode reward: [(0, '31.848')] -[2024-09-01 09:37:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9297920. Throughput: 0: 206.9. Samples: 1941022. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:37:47,157][00307] Avg episode reward: [(0, '31.816')] -[2024-09-01 09:37:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9302016. Throughput: 0: 220.0. Samples: 1941986. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:37:52,157][00307] Avg episode reward: [(0, '32.003')] -[2024-09-01 09:37:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9306112. Throughput: 0: 238.7. Samples: 1943856. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:37:57,160][00307] Avg episode reward: [(0, '31.742')] -[2024-09-01 09:38:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9310208. Throughput: 0: 221.6. Samples: 1944772. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:02,156][00307] Avg episode reward: [(0, '32.309')] -[2024-09-01 09:38:04,260][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002274_9314304.pth... -[2024-09-01 09:38:04,376][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002221_9097216.pth -[2024-09-01 09:38:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9314304. Throughput: 0: 213.0. Samples: 1945258. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:38:07,163][00307] Avg episode reward: [(0, '31.921')] -[2024-09-01 09:38:12,067][16881] Updated weights for policy 0, policy_version 2276 (0.0591) -[2024-09-01 09:38:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9322496. Throughput: 0: 228.3. Samples: 1947050. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:38:12,162][00307] Avg episode reward: [(0, '31.338')] -[2024-09-01 09:38:15,018][16868] Signal inference workers to stop experience collection... (1900 times) -[2024-09-01 09:38:15,127][16881] InferenceWorker_p0-w0: stopping experience collection (1900 times) -[2024-09-01 09:38:17,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9322496. Throughput: 0: 226.8. Samples: 1948064. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:38:17,160][00307] Avg episode reward: [(0, '30.948')] -[2024-09-01 09:38:17,176][16868] Signal inference workers to resume experience collection... (1900 times) -[2024-09-01 09:38:17,177][16881] InferenceWorker_p0-w0: resuming experience collection (1900 times) -[2024-09-01 09:38:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9326592. Throughput: 0: 219.1. Samples: 1948738. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:38:22,157][00307] Avg episode reward: [(0, '30.626')] -[2024-09-01 09:38:27,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9334784. Throughput: 0: 224.0. Samples: 1950182. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:38:27,164][00307] Avg episode reward: [(0, '30.077')] -[2024-09-01 09:38:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9338880. Throughput: 0: 238.8. Samples: 1951770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:32,165][00307] Avg episode reward: [(0, '29.097')] -[2024-09-01 09:38:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9342976. Throughput: 0: 227.7. Samples: 1952232. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:37,161][00307] Avg episode reward: [(0, '29.328')] -[2024-09-01 09:38:42,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9347072. Throughput: 0: 210.0. Samples: 1953306. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:42,163][00307] Avg episode reward: [(0, '29.218')] -[2024-09-01 09:38:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9351168. Throughput: 0: 232.3. Samples: 1955226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:47,168][00307] Avg episode reward: [(0, '28.158')] -[2024-09-01 09:38:52,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9355264. Throughput: 0: 229.1. Samples: 1955568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:52,158][00307] Avg episode reward: [(0, '28.487')] -[2024-09-01 09:38:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9359360. Throughput: 0: 217.1. Samples: 1956820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:38:57,161][00307] Avg episode reward: [(0, '28.684')] -[2024-09-01 09:38:59,785][16881] Updated weights for policy 0, policy_version 2286 (0.1050) -[2024-09-01 09:39:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9363456. Throughput: 0: 227.9. Samples: 1958318. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:39:02,157][00307] Avg episode reward: [(0, '28.765')] -[2024-09-01 09:39:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9371648. Throughput: 0: 231.2. Samples: 1959144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:39:07,157][00307] Avg episode reward: [(0, '28.876')] -[2024-09-01 09:39:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9371648. Throughput: 0: 227.0. Samples: 1960398. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:39:12,157][00307] Avg episode reward: [(0, '28.570')] -[2024-09-01 09:39:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9375744. Throughput: 0: 214.6. Samples: 1961428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:39:17,156][00307] Avg episode reward: [(0, '28.004')] -[2024-09-01 09:39:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9383936. Throughput: 0: 227.0. Samples: 1962446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:39:22,160][00307] Avg episode reward: [(0, '26.989')] -[2024-09-01 09:39:27,158][00307] Fps is (10 sec: 1228.3, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9388032. Throughput: 0: 228.1. Samples: 1963570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:39:27,162][00307] Avg episode reward: [(0, '26.998')] -[2024-09-01 09:39:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9392128. Throughput: 0: 208.8. Samples: 1964622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:39:32,162][00307] Avg episode reward: [(0, '26.890')] -[2024-09-01 09:39:37,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9396224. Throughput: 0: 220.8. Samples: 1965504. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:39:37,162][00307] Avg episode reward: [(0, '26.829')] -[2024-09-01 09:39:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9400320. Throughput: 0: 231.8. Samples: 1967250. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:39:42,163][00307] Avg episode reward: [(0, '26.948')] -[2024-09-01 09:39:44,136][16881] Updated weights for policy 0, policy_version 2296 (0.2473) -[2024-09-01 09:39:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9404416. Throughput: 0: 220.3. Samples: 1968232. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:39:47,162][00307] Avg episode reward: [(0, '27.013')] -[2024-09-01 09:39:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9408512. Throughput: 0: 212.0. Samples: 1968686. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:39:52,156][00307] Avg episode reward: [(0, '27.255')] -[2024-09-01 09:39:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9412608. Throughput: 0: 227.1. Samples: 1970618. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:39:57,163][00307] Avg episode reward: [(0, '27.145')] -[2024-09-01 09:40:01,956][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth... -[2024-09-01 09:40:02,054][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002247_9203712.pth -[2024-09-01 09:40:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9420800. Throughput: 0: 227.3. Samples: 1971656. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:40:02,157][00307] Avg episode reward: [(0, '26.788')] -[2024-09-01 09:40:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9420800. Throughput: 0: 220.7. Samples: 1972376. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:40:07,156][00307] Avg episode reward: [(0, '26.636')] -[2024-09-01 09:40:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9428992. Throughput: 0: 224.5. Samples: 1973670. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:12,164][00307] Avg episode reward: [(0, '26.369')] -[2024-09-01 09:40:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9433088. Throughput: 0: 233.5. Samples: 1975130. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:17,165][00307] Avg episode reward: [(0, '26.261')] -[2024-09-01 09:40:22,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9437184. Throughput: 0: 226.1. Samples: 1975680. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:22,164][00307] Avg episode reward: [(0, '26.114')] -[2024-09-01 09:40:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9441280. Throughput: 0: 211.3. Samples: 1976760. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:27,167][00307] Avg episode reward: [(0, '26.516')] -[2024-09-01 09:40:30,163][16881] Updated weights for policy 0, policy_version 2306 (0.1542) -[2024-09-01 09:40:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9445376. Throughput: 0: 230.6. Samples: 1978610. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:32,162][00307] Avg episode reward: [(0, '26.856')] -[2024-09-01 09:40:37,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9449472. Throughput: 0: 230.8. Samples: 1979072. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:40:37,160][00307] Avg episode reward: [(0, '27.237')] -[2024-09-01 09:40:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9453568. Throughput: 0: 213.8. Samples: 1980238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:40:42,161][00307] Avg episode reward: [(0, '28.295')] -[2024-09-01 09:40:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9457664. Throughput: 0: 223.6. Samples: 1981716. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:40:47,160][00307] Avg episode reward: [(0, '28.632')] -[2024-09-01 09:40:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9461760. Throughput: 0: 226.4. Samples: 1982564. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:40:52,160][00307] Avg episode reward: [(0, '28.174')] -[2024-09-01 09:40:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9465856. Throughput: 0: 224.9. Samples: 1983790. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:40:57,157][00307] Avg episode reward: [(0, '28.545')] -[2024-09-01 09:41:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9469952. Throughput: 0: 220.0. Samples: 1985032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:41:02,157][00307] Avg episode reward: [(0, '28.925')] -[2024-09-01 09:41:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9478144. Throughput: 0: 224.8. Samples: 1985796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:07,157][00307] Avg episode reward: [(0, '28.567')] -[2024-09-01 09:41:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9482240. Throughput: 0: 230.7. Samples: 1987140. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:12,160][00307] Avg episode reward: [(0, '28.344')] -[2024-09-01 09:41:16,319][16881] Updated weights for policy 0, policy_version 2316 (0.1580) -[2024-09-01 09:41:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9486336. Throughput: 0: 214.0. Samples: 1988238. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:41:17,161][00307] Avg episode reward: [(0, '28.394')] -[2024-09-01 09:41:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9490432. Throughput: 0: 222.0. Samples: 1989064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:41:22,163][00307] Avg episode reward: [(0, '28.648')] -[2024-09-01 09:41:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9494528. Throughput: 0: 228.2. Samples: 1990506. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:27,159][00307] Avg episode reward: [(0, '28.491')] -[2024-09-01 09:41:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9498624. Throughput: 0: 222.0. Samples: 1991708. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:32,157][00307] Avg episode reward: [(0, '28.094')] -[2024-09-01 09:41:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9502720. Throughput: 0: 213.8. Samples: 1992186. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:41:37,158][00307] Avg episode reward: [(0, '27.966')] -[2024-09-01 09:41:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9506816. Throughput: 0: 221.2. Samples: 1993746. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:41:42,162][00307] Avg episode reward: [(0, '29.551')] -[2024-09-01 09:41:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9510912. Throughput: 0: 226.8. Samples: 1995236. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:47,158][00307] Avg episode reward: [(0, '29.634')] -[2024-09-01 09:41:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9515008. Throughput: 0: 223.3. Samples: 1995844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:41:52,160][00307] Avg episode reward: [(0, '30.868')] -[2024-09-01 09:41:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9519104. Throughput: 0: 220.7. Samples: 1997070. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 09:41:57,165][00307] Avg episode reward: [(0, '30.333')] -[2024-09-01 09:42:01,726][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002326_9527296.pth... -[2024-09-01 09:42:01,738][16881] Updated weights for policy 0, policy_version 2326 (0.1150) -[2024-09-01 09:42:01,833][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002274_9314304.pth -[2024-09-01 09:42:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9527296. Throughput: 0: 225.9. Samples: 1998402. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:02,157][00307] Avg episode reward: [(0, '31.138')] -[2024-09-01 09:42:04,159][16868] Signal inference workers to stop experience collection... (1950 times) -[2024-09-01 09:42:04,200][16881] InferenceWorker_p0-w0: stopping experience collection (1950 times) -[2024-09-01 09:42:06,137][16868] Signal inference workers to resume experience collection... (1950 times) -[2024-09-01 09:42:06,138][16881] InferenceWorker_p0-w0: resuming experience collection (1950 times) -[2024-09-01 09:42:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9531392. Throughput: 0: 228.1. Samples: 1999330. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:07,158][00307] Avg episode reward: [(0, '30.405')] -[2024-09-01 09:42:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9531392. Throughput: 0: 218.8. Samples: 2000352. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:12,156][00307] Avg episode reward: [(0, '31.631')] -[2024-09-01 09:42:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9539584. Throughput: 0: 223.5. Samples: 2001764. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:42:17,156][00307] Avg episode reward: [(0, '30.896')] -[2024-09-01 09:42:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9543680. Throughput: 0: 226.4. Samples: 2002374. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:42:22,164][00307] Avg episode reward: [(0, '31.209')] -[2024-09-01 09:42:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9547776. Throughput: 0: 221.1. Samples: 2003694. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:42:27,158][00307] Avg episode reward: [(0, '31.361')] -[2024-09-01 09:42:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9551872. Throughput: 0: 218.3. Samples: 2005060. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:32,163][00307] Avg episode reward: [(0, '31.225')] -[2024-09-01 09:42:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9555968. Throughput: 0: 219.7. Samples: 2005730. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:37,167][00307] Avg episode reward: [(0, '31.369')] -[2024-09-01 09:42:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9560064. Throughput: 0: 225.7. Samples: 2007226. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:42:42,159][00307] Avg episode reward: [(0, '31.369')] -[2024-09-01 09:42:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9564160. Throughput: 0: 221.1. Samples: 2008352. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:42:47,160][00307] Avg episode reward: [(0, '31.296')] -[2024-09-01 09:42:48,867][16881] Updated weights for policy 0, policy_version 2336 (0.0558) -[2024-09-01 09:42:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9568256. Throughput: 0: 214.9. Samples: 2009002. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:52,157][00307] Avg episode reward: [(0, '31.038')] -[2024-09-01 09:42:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9576448. Throughput: 0: 227.2. Samples: 2010574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:42:57,161][00307] Avg episode reward: [(0, '31.015')] -[2024-09-01 09:43:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9580544. Throughput: 0: 219.5. Samples: 2011642. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:02,158][00307] Avg episode reward: [(0, '31.706')] -[2024-09-01 09:43:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 9580544. Throughput: 0: 220.8. Samples: 2012310. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:07,164][00307] Avg episode reward: [(0, '31.525')] -[2024-09-01 09:43:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9588736. Throughput: 0: 222.2. Samples: 2013694. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:43:12,162][00307] Avg episode reward: [(0, '31.550')] -[2024-09-01 09:43:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9592832. Throughput: 0: 230.3. Samples: 2015422. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:17,160][00307] Avg episode reward: [(0, '32.063')] -[2024-09-01 09:43:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9596928. Throughput: 0: 221.8. Samples: 2015710. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:22,163][00307] Avg episode reward: [(0, '32.276')] -[2024-09-01 09:43:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9601024. Throughput: 0: 212.2. Samples: 2016774. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:27,157][00307] Avg episode reward: [(0, '32.364')] -[2024-09-01 09:43:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9605120. Throughput: 0: 231.4. Samples: 2018766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:32,166][00307] Avg episode reward: [(0, '32.253')] -[2024-09-01 09:43:33,268][16881] Updated weights for policy 0, policy_version 2346 (0.1540) -[2024-09-01 09:43:37,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9609216. Throughput: 0: 228.7. Samples: 2019296. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:37,166][00307] Avg episode reward: [(0, '32.118')] -[2024-09-01 09:43:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9613312. Throughput: 0: 216.0. Samples: 2020292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:43:42,157][00307] Avg episode reward: [(0, '32.251')] -[2024-09-01 09:43:47,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9617408. Throughput: 0: 226.7. Samples: 2021842. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:43:47,167][00307] Avg episode reward: [(0, '32.251')] -[2024-09-01 09:43:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9625600. Throughput: 0: 235.1. Samples: 2022890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:43:52,160][00307] Avg episode reward: [(0, '32.113')] -[2024-09-01 09:43:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9629696. Throughput: 0: 226.8. Samples: 2023902. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:43:57,157][00307] Avg episode reward: [(0, '32.315')] -[2024-09-01 09:44:01,799][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002352_9633792.pth... -[2024-09-01 09:44:01,916][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth -[2024-09-01 09:44:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9633792. Throughput: 0: 213.1. Samples: 2025012. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:02,165][00307] Avg episode reward: [(0, '32.154')] -[2024-09-01 09:44:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9637888. Throughput: 0: 227.3. Samples: 2025940. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:07,159][00307] Avg episode reward: [(0, '31.842')] -[2024-09-01 09:44:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9641984. Throughput: 0: 232.9. Samples: 2027254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:12,160][00307] Avg episode reward: [(0, '31.301')] -[2024-09-01 09:44:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9646080. Throughput: 0: 213.6. Samples: 2028376. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:17,165][00307] Avg episode reward: [(0, '31.477')] -[2024-09-01 09:44:20,581][16881] Updated weights for policy 0, policy_version 2356 (0.0553) -[2024-09-01 09:44:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9650176. Throughput: 0: 217.0. Samples: 2029062. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:44:22,162][00307] Avg episode reward: [(0, '30.766')] -[2024-09-01 09:44:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9654272. Throughput: 0: 236.8. Samples: 2030948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:44:27,158][00307] Avg episode reward: [(0, '31.637')] -[2024-09-01 09:44:32,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9658368. Throughput: 0: 223.6. Samples: 2031906. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:44:32,163][00307] Avg episode reward: [(0, '31.637')] -[2024-09-01 09:44:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9662464. Throughput: 0: 212.3. Samples: 2032442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:44:37,166][00307] Avg episode reward: [(0, '30.648')] -[2024-09-01 09:44:42,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9666560. Throughput: 0: 227.1. Samples: 2034122. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:42,167][00307] Avg episode reward: [(0, '30.773')] -[2024-09-01 09:44:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9670656. Throughput: 0: 226.6. Samples: 2035210. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:47,157][00307] Avg episode reward: [(0, '30.935')] -[2024-09-01 09:44:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9674752. Throughput: 0: 220.3. Samples: 2035852. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:52,166][00307] Avg episode reward: [(0, '31.215')] -[2024-09-01 09:44:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9682944. Throughput: 0: 223.1. Samples: 2037292. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:44:57,158][00307] Avg episode reward: [(0, '31.047')] -[2024-09-01 09:45:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9687040. Throughput: 0: 231.9. Samples: 2038810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:02,156][00307] Avg episode reward: [(0, '30.725')] -[2024-09-01 09:45:06,066][16881] Updated weights for policy 0, policy_version 2366 (0.1061) -[2024-09-01 09:45:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9691136. Throughput: 0: 228.0. Samples: 2039322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:07,158][00307] Avg episode reward: [(0, '30.975')] -[2024-09-01 09:45:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9695232. Throughput: 0: 208.0. Samples: 2040308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:12,163][00307] Avg episode reward: [(0, '31.346')] -[2024-09-01 09:45:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9699328. Throughput: 0: 228.0. Samples: 2042164. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:17,159][00307] Avg episode reward: [(0, '30.424')] -[2024-09-01 09:45:22,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9703424. Throughput: 0: 225.2. Samples: 2042578. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:22,159][00307] Avg episode reward: [(0, '29.397')] -[2024-09-01 09:45:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9707520. Throughput: 0: 214.4. Samples: 2043772. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:27,161][00307] Avg episode reward: [(0, '29.499')] -[2024-09-01 09:45:32,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9711616. Throughput: 0: 228.1. Samples: 2045474. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:32,158][00307] Avg episode reward: [(0, '29.489')] -[2024-09-01 09:45:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9715712. Throughput: 0: 229.0. Samples: 2046156. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:45:37,164][00307] Avg episode reward: [(0, '28.377')] -[2024-09-01 09:45:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9719808. Throughput: 0: 226.3. Samples: 2047474. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:45:42,156][00307] Avg episode reward: [(0, '28.652')] -[2024-09-01 09:45:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9723904. Throughput: 0: 216.9. Samples: 2048572. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:45:47,158][00307] Avg episode reward: [(0, '28.438')] -[2024-09-01 09:45:52,092][16881] Updated weights for policy 0, policy_version 2376 (0.1565) -[2024-09-01 09:45:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9732096. Throughput: 0: 226.9. Samples: 2049534. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:45:52,164][00307] Avg episode reward: [(0, '29.055')] -[2024-09-01 09:45:54,451][16868] Signal inference workers to stop experience collection... (2000 times) -[2024-09-01 09:45:54,492][16881] InferenceWorker_p0-w0: stopping experience collection (2000 times) -[2024-09-01 09:45:55,434][16868] Signal inference workers to resume experience collection... (2000 times) -[2024-09-01 09:45:55,436][16881] InferenceWorker_p0-w0: resuming experience collection (2000 times) -[2024-09-01 09:45:57,161][00307] Fps is (10 sec: 1227.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9736192. Throughput: 0: 229.9. Samples: 2050656. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:45:57,165][00307] Avg episode reward: [(0, '28.648')] -[2024-09-01 09:46:01,310][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002378_9740288.pth... -[2024-09-01 09:46:01,479][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002326_9527296.pth -[2024-09-01 09:46:02,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9740288. Throughput: 0: 214.3. Samples: 2051810. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:46:02,162][00307] Avg episode reward: [(0, '29.294')] -[2024-09-01 09:46:07,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9744384. Throughput: 0: 224.4. Samples: 2052674. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:46:07,157][00307] Avg episode reward: [(0, '30.053')] -[2024-09-01 09:46:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9748480. Throughput: 0: 229.4. Samples: 2054094. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:12,163][00307] Avg episode reward: [(0, '29.708')] -[2024-09-01 09:46:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9752576. Throughput: 0: 220.1. Samples: 2055378. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:17,164][00307] Avg episode reward: [(0, '29.974')] -[2024-09-01 09:46:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9756672. Throughput: 0: 213.2. Samples: 2055750. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:22,161][00307] Avg episode reward: [(0, '29.630')] -[2024-09-01 09:46:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9760768. Throughput: 0: 218.7. Samples: 2057314. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:27,157][00307] Avg episode reward: [(0, '30.400')] -[2024-09-01 09:46:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9764864. Throughput: 0: 227.0. Samples: 2058788. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:32,156][00307] Avg episode reward: [(0, '30.054')] -[2024-09-01 09:46:37,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9768960. Throughput: 0: 212.5. Samples: 2059098. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:37,163][00307] Avg episode reward: [(0, '30.483')] -[2024-09-01 09:46:39,080][16881] Updated weights for policy 0, policy_version 2386 (0.1830) -[2024-09-01 09:46:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9773056. Throughput: 0: 221.7. Samples: 2060630. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:46:42,167][00307] Avg episode reward: [(0, '30.483')] -[2024-09-01 09:46:47,154][00307] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9781248. Throughput: 0: 225.7. Samples: 2061964. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:47,165][00307] Avg episode reward: [(0, '28.690')] -[2024-09-01 09:46:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9785344. Throughput: 0: 228.5. Samples: 2062956. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:52,159][00307] Avg episode reward: [(0, '28.534')] -[2024-09-01 09:46:57,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.3, 300 sec: 874.7). Total num frames: 9785344. Throughput: 0: 218.1. Samples: 2063910. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:46:57,158][00307] Avg episode reward: [(0, '28.939')] -[2024-09-01 09:47:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9793536. Throughput: 0: 221.2. Samples: 2065330. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:02,157][00307] Avg episode reward: [(0, '28.959')] -[2024-09-01 09:47:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9797632. Throughput: 0: 227.2. Samples: 2065974. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:07,162][00307] Avg episode reward: [(0, '28.819')] -[2024-09-01 09:47:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9801728. Throughput: 0: 217.9. Samples: 2067120. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:12,157][00307] Avg episode reward: [(0, '28.628')] -[2024-09-01 09:47:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9805824. Throughput: 0: 218.3. Samples: 2068610. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:47:17,157][00307] Avg episode reward: [(0, '28.707')] -[2024-09-01 09:47:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9809920. Throughput: 0: 221.3. Samples: 2069056. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:47:22,161][00307] Avg episode reward: [(0, '29.374')] -[2024-09-01 09:47:23,895][16881] Updated weights for policy 0, policy_version 2396 (0.1052) -[2024-09-01 09:47:27,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9814016. Throughput: 0: 222.7. Samples: 2070654. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:47:27,170][00307] Avg episode reward: [(0, '29.534')] -[2024-09-01 09:47:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9818112. Throughput: 0: 218.9. Samples: 2071816. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:47:32,157][00307] Avg episode reward: [(0, '29.396')] -[2024-09-01 09:47:37,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9822208. Throughput: 0: 212.8. Samples: 2072532. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:47:37,164][00307] Avg episode reward: [(0, '29.735')] -[2024-09-01 09:47:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9830400. Throughput: 0: 226.8. Samples: 2074114. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:42,157][00307] Avg episode reward: [(0, '29.915')] -[2024-09-01 09:47:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9830400. Throughput: 0: 219.0. Samples: 2075186. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:47,162][00307] Avg episode reward: [(0, '29.370')] -[2024-09-01 09:47:52,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 9834496. Throughput: 0: 218.0. Samples: 2075782. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:47:52,166][00307] Avg episode reward: [(0, '28.666')] -[2024-09-01 09:47:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 9842688. Throughput: 0: 224.7. Samples: 2077230. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:47:57,158][00307] Avg episode reward: [(0, '29.006')] -[2024-09-01 09:47:59,781][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002404_9846784.pth... -[2024-09-01 09:47:59,883][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002352_9633792.pth -[2024-09-01 09:48:02,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9846784. Throughput: 0: 227.0. Samples: 2078826. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:48:02,162][00307] Avg episode reward: [(0, '29.716')] -[2024-09-01 09:48:07,161][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9850880. Throughput: 0: 227.5. Samples: 2079296. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:48:07,164][00307] Avg episode reward: [(0, '29.609')] -[2024-09-01 09:48:10,608][16881] Updated weights for policy 0, policy_version 2406 (0.0600) -[2024-09-01 09:48:12,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9854976. Throughput: 0: 219.0. Samples: 2080508. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:12,158][00307] Avg episode reward: [(0, '29.504')] -[2024-09-01 09:48:17,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9859072. Throughput: 0: 234.2. Samples: 2082356. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:17,163][00307] Avg episode reward: [(0, '29.709')] -[2024-09-01 09:48:22,188][00307] Fps is (10 sec: 816.4, 60 sec: 887.0, 300 sec: 888.5). Total num frames: 9863168. Throughput: 0: 227.7. Samples: 2082786. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:48:22,200][00307] Avg episode reward: [(0, '29.597')] -[2024-09-01 09:48:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9867264. Throughput: 0: 215.7. Samples: 2083822. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:48:27,158][00307] Avg episode reward: [(0, '29.666')] -[2024-09-01 09:48:32,154][00307] Fps is (10 sec: 822.0, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9871360. Throughput: 0: 226.0. Samples: 2085354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:48:32,165][00307] Avg episode reward: [(0, '29.872')] -[2024-09-01 09:48:37,155][00307] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9879552. Throughput: 0: 230.4. Samples: 2086148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:37,162][00307] Avg episode reward: [(0, '30.369')] -[2024-09-01 09:48:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9879552. Throughput: 0: 226.0. Samples: 2087398. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:42,160][00307] Avg episode reward: [(0, '30.369')] -[2024-09-01 09:48:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 9887744. Throughput: 0: 215.2. Samples: 2088508. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:47,163][00307] Avg episode reward: [(0, '29.571')] -[2024-09-01 09:48:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 9891840. Throughput: 0: 226.7. Samples: 2089498. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:48:52,156][00307] Avg episode reward: [(0, '29.224')] -[2024-09-01 09:48:55,298][16881] Updated weights for policy 0, policy_version 2416 (0.1555) -[2024-09-01 09:48:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9895936. Throughput: 0: 226.8. Samples: 2090714. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:48:57,167][00307] Avg episode reward: [(0, '29.059')] -[2024-09-01 09:49:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9900032. Throughput: 0: 211.6. Samples: 2091880. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:49:02,156][00307] Avg episode reward: [(0, '30.080')] -[2024-09-01 09:49:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 9904128. Throughput: 0: 218.3. Samples: 2092600. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:49:07,161][00307] Avg episode reward: [(0, '30.193')] -[2024-09-01 09:49:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9908224. Throughput: 0: 234.7. Samples: 2094382. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:49:12,156][00307] Avg episode reward: [(0, '29.618')] -[2024-09-01 09:49:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9912320. Throughput: 0: 222.2. Samples: 2095354. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:17,158][00307] Avg episode reward: [(0, '29.152')] -[2024-09-01 09:49:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 888.0, 300 sec: 888.6). Total num frames: 9916416. Throughput: 0: 214.2. Samples: 2095786. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:22,159][00307] Avg episode reward: [(0, '29.597')] -[2024-09-01 09:49:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9920512. Throughput: 0: 226.4. Samples: 2097584. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:27,156][00307] Avg episode reward: [(0, '29.082')] -[2024-09-01 09:49:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9924608. Throughput: 0: 227.9. Samples: 2098764. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:32,156][00307] Avg episode reward: [(0, '28.874')] -[2024-09-01 09:49:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9928704. Throughput: 0: 217.4. Samples: 2099282. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:37,163][00307] Avg episode reward: [(0, '28.527')] -[2024-09-01 09:49:41,990][16881] Updated weights for policy 0, policy_version 2426 (0.2090) -[2024-09-01 09:49:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9936896. Throughput: 0: 221.3. Samples: 2100674. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:42,158][00307] Avg episode reward: [(0, '28.903')] -[2024-09-01 09:49:44,449][16868] Signal inference workers to stop experience collection... (2050 times) -[2024-09-01 09:49:44,507][16881] InferenceWorker_p0-w0: stopping experience collection (2050 times) -[2024-09-01 09:49:45,951][16868] Signal inference workers to resume experience collection... (2050 times) -[2024-09-01 09:49:45,953][16881] InferenceWorker_p0-w0: resuming experience collection (2050 times) -[2024-09-01 09:49:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9940992. Throughput: 0: 223.0. Samples: 2101916. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:49:47,164][00307] Avg episode reward: [(0, '28.505')] -[2024-09-01 09:49:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9945088. Throughput: 0: 223.4. Samples: 2102654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:49:52,157][00307] Avg episode reward: [(0, '28.188')] -[2024-09-01 09:49:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9949184. Throughput: 0: 208.3. Samples: 2103754. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:49:57,161][00307] Avg episode reward: [(0, '27.720')] -[2024-09-01 09:50:00,426][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002430_9953280.pth... -[2024-09-01 09:50:00,546][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002378_9740288.pth -[2024-09-01 09:50:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9953280. Throughput: 0: 224.3. Samples: 2105448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:02,166][00307] Avg episode reward: [(0, '29.161')] -[2024-09-01 09:50:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9957376. Throughput: 0: 223.9. Samples: 2105860. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:07,160][00307] Avg episode reward: [(0, '29.335')] -[2024-09-01 09:50:12,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9961472. Throughput: 0: 209.7. Samples: 2107020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:50:12,159][00307] Avg episode reward: [(0, '29.952')] -[2024-09-01 09:50:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9965568. Throughput: 0: 217.9. Samples: 2108568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:50:17,158][00307] Avg episode reward: [(0, '29.554')] -[2024-09-01 09:50:22,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9969664. Throughput: 0: 220.7. Samples: 2109212. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:22,156][00307] Avg episode reward: [(0, '29.797')] -[2024-09-01 09:50:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9973760. Throughput: 0: 222.2. Samples: 2110672. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:27,157][00307] Avg episode reward: [(0, '29.202')] -[2024-09-01 09:50:28,818][16881] Updated weights for policy 0, policy_version 2436 (0.1091) -[2024-09-01 09:50:32,159][00307] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9977856. Throughput: 0: 214.1. Samples: 2111550. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:32,161][00307] Avg episode reward: [(0, '29.012')] -[2024-09-01 09:50:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9981952. Throughput: 0: 207.7. Samples: 2112002. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:37,157][00307] Avg episode reward: [(0, '29.106')] -[2024-09-01 09:50:42,154][00307] Fps is (10 sec: 819.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9986048. Throughput: 0: 214.8. Samples: 2113418. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:42,162][00307] Avg episode reward: [(0, '29.330')] -[2024-09-01 09:50:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 9990144. Throughput: 0: 200.0. Samples: 2114448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:50:47,159][00307] Avg episode reward: [(0, '29.037')] -[2024-09-01 09:50:52,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 874.8). Total num frames: 9994240. Throughput: 0: 201.0. Samples: 2114904. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:50:52,161][00307] Avg episode reward: [(0, '28.844')] -[2024-09-01 09:50:57,156][00307] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 9998336. Throughput: 0: 199.6. Samples: 2116002. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:50:57,162][00307] Avg episode reward: [(0, '28.755')] -[2024-09-01 09:51:02,154][00307] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 10002432. Throughput: 0: 194.0. Samples: 2117296. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:51:02,163][00307] Avg episode reward: [(0, '28.459')] -[2024-09-01 09:51:07,154][00307] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 10006528. Throughput: 0: 196.2. Samples: 2118040. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:51:07,158][00307] Avg episode reward: [(0, '29.604')] -[2024-09-01 09:51:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 751.0, 300 sec: 860.9). Total num frames: 10006528. Throughput: 0: 186.5. Samples: 2119066. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 09:51:12,160][00307] Avg episode reward: [(0, '29.443')] -[2024-09-01 09:51:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 10010624. Throughput: 0: 191.1. Samples: 2120150. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:17,158][00307] Avg episode reward: [(0, '29.822')] -[2024-09-01 09:51:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 10014720. Throughput: 0: 194.8. Samples: 2120770. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:22,161][00307] Avg episode reward: [(0, '30.166')] -[2024-09-01 09:51:22,236][16881] Updated weights for policy 0, policy_version 2446 (0.2293) -[2024-09-01 09:51:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 860.9). Total num frames: 10018816. Throughput: 0: 192.7. Samples: 2122088. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:27,157][00307] Avg episode reward: [(0, '29.998')] -[2024-09-01 09:51:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 860.9). Total num frames: 10022912. Throughput: 0: 189.8. Samples: 2122988. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:32,157][00307] Avg episode reward: [(0, '30.476')] -[2024-09-01 09:51:37,156][00307] Fps is (10 sec: 819.0, 60 sec: 750.9, 300 sec: 860.8). Total num frames: 10027008. Throughput: 0: 188.0. Samples: 2123364. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:37,159][00307] Avg episode reward: [(0, '30.442')] -[2024-09-01 09:51:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 10031104. Throughput: 0: 194.3. Samples: 2124744. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:42,159][00307] Avg episode reward: [(0, '30.036')] -[2024-09-01 09:51:47,155][00307] Fps is (10 sec: 819.3, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 10035200. Throughput: 0: 192.0. Samples: 2125934. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:47,158][00307] Avg episode reward: [(0, '29.972')] -[2024-09-01 09:51:52,159][00307] Fps is (10 sec: 818.8, 60 sec: 750.9, 300 sec: 860.8). Total num frames: 10039296. Throughput: 0: 183.7. Samples: 2126308. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:52,164][00307] Avg episode reward: [(0, '30.266')] -[2024-09-01 09:51:57,156][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 10043392. Throughput: 0: 185.3. Samples: 2127406. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:51:57,159][00307] Avg episode reward: [(0, '30.173')] -[2024-09-01 09:52:01,121][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002453_10047488.pth... -[2024-09-01 09:52:01,256][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002404_9846784.pth -[2024-09-01 09:52:02,154][00307] Fps is (10 sec: 819.6, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 10047488. Throughput: 0: 193.7. Samples: 2128868. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 09:52:02,159][00307] Avg episode reward: [(0, '31.046')] -[2024-09-01 09:52:07,155][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 847.0). Total num frames: 10051584. Throughput: 0: 196.3. Samples: 2129604. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:52:07,158][00307] Avg episode reward: [(0, '31.521')] -[2024-09-01 09:52:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10051584. Throughput: 0: 189.1. Samples: 2130596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:52:12,156][00307] Avg episode reward: [(0, '32.112')] -[2024-09-01 09:52:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10055680. Throughput: 0: 192.6. Samples: 2131656. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:52:17,162][00307] Avg episode reward: [(0, '32.498')] -[2024-09-01 09:52:18,219][16881] Updated weights for policy 0, policy_version 2456 (0.1142) -[2024-09-01 09:52:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10059776. Throughput: 0: 198.2. Samples: 2132282. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:52:22,164][00307] Avg episode reward: [(0, '32.235')] -[2024-09-01 09:52:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10063872. Throughput: 0: 196.7. Samples: 2133594. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:52:27,157][00307] Avg episode reward: [(0, '32.227')] -[2024-09-01 09:52:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10067968. Throughput: 0: 190.3. Samples: 2134496. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:52:32,158][00307] Avg episode reward: [(0, '32.227')] -[2024-09-01 09:52:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 10072064. Throughput: 0: 188.7. Samples: 2134800. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:52:37,156][00307] Avg episode reward: [(0, '32.174')] -[2024-09-01 09:52:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10076160. Throughput: 0: 198.8. Samples: 2136352. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:52:42,156][00307] Avg episode reward: [(0, '32.508')] -[2024-09-01 09:52:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 833.1). Total num frames: 10080256. Throughput: 0: 191.5. Samples: 2137486. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:52:47,162][00307] Avg episode reward: [(0, '32.958')] -[2024-09-01 09:52:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 10084352. Throughput: 0: 181.1. Samples: 2137752. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:52:52,167][00307] Avg episode reward: [(0, '32.668')] -[2024-09-01 09:52:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 819.2). Total num frames: 10088448. Throughput: 0: 181.5. Samples: 2138762. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) -[2024-09-01 09:52:57,163][00307] Avg episode reward: [(0, '32.806')] -[2024-09-01 09:53:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 10092544. Throughput: 0: 190.5. Samples: 2140230. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:02,165][00307] Avg episode reward: [(0, '32.081')] -[2024-09-01 09:53:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 819.2). Total num frames: 10096640. Throughput: 0: 189.6. Samples: 2140814. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:07,159][00307] Avg episode reward: [(0, '32.160')] -[2024-09-01 09:53:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 10096640. Throughput: 0: 184.5. Samples: 2141898. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:12,159][00307] Avg episode reward: [(0, '32.191')] -[2024-09-01 09:53:12,865][16881] Updated weights for policy 0, policy_version 2466 (0.1183) -[2024-09-01 09:53:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 805.4). Total num frames: 10100736. Throughput: 0: 186.6. Samples: 2142894. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:17,158][00307] Avg episode reward: [(0, '32.459')] -[2024-09-01 09:53:22,155][00307] Fps is (10 sec: 1228.7, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 10108928. Throughput: 0: 197.5. Samples: 2143688. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:22,161][00307] Avg episode reward: [(0, '31.752')] -[2024-09-01 09:53:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 10108928. Throughput: 0: 189.8. Samples: 2144894. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:27,160][00307] Avg episode reward: [(0, '31.735')] -[2024-09-01 09:53:32,160][00307] Fps is (10 sec: 409.4, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10113024. Throughput: 0: 185.5. Samples: 2145834. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:32,164][00307] Avg episode reward: [(0, '31.371')] -[2024-09-01 09:53:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 805.3). Total num frames: 10117120. Throughput: 0: 186.6. Samples: 2146148. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:37,157][00307] Avg episode reward: [(0, '31.713')] -[2024-09-01 09:53:42,154][00307] Fps is (10 sec: 819.7, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10121216. Throughput: 0: 196.2. Samples: 2147592. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:42,168][00307] Avg episode reward: [(0, '31.898')] -[2024-09-01 09:53:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10125312. Throughput: 0: 191.8. Samples: 2148862. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:47,157][00307] Avg episode reward: [(0, '31.422')] -[2024-09-01 09:53:52,159][00307] Fps is (10 sec: 818.8, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10129408. Throughput: 0: 184.6. Samples: 2149122. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) -[2024-09-01 09:53:52,162][00307] Avg episode reward: [(0, '31.177')] -[2024-09-01 09:53:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10133504. Throughput: 0: 186.6. Samples: 2150294. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:53:57,162][00307] Avg episode reward: [(0, '30.478')] -[2024-09-01 09:53:59,752][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002475_10137600.pth... -[2024-09-01 09:53:59,883][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002430_9953280.pth -[2024-09-01 09:54:02,154][00307] Fps is (10 sec: 819.6, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10137600. Throughput: 0: 197.1. Samples: 2151764. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:02,162][00307] Avg episode reward: [(0, '30.327')] -[2024-09-01 09:54:04,826][16881] Updated weights for policy 0, policy_version 2476 (0.1705) -[2024-09-01 09:54:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10141696. Throughput: 0: 186.4. Samples: 2152076. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:54:07,163][00307] Avg episode reward: [(0, '29.959')] -[2024-09-01 09:54:09,732][16868] Signal inference workers to stop experience collection... (2100 times) -[2024-09-01 09:54:09,854][16881] InferenceWorker_p0-w0: stopping experience collection (2100 times) -[2024-09-01 09:54:11,306][16868] Signal inference workers to resume experience collection... (2100 times) -[2024-09-01 09:54:11,308][16881] InferenceWorker_p0-w0: resuming experience collection (2100 times) -[2024-09-01 09:54:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 10145792. Throughput: 0: 182.0. Samples: 2153086. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:54:12,159][00307] Avg episode reward: [(0, '30.433')] -[2024-09-01 09:54:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 10149888. Throughput: 0: 185.0. Samples: 2154158. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:17,162][00307] Avg episode reward: [(0, '29.970')] -[2024-09-01 09:54:22,159][00307] Fps is (10 sec: 818.8, 60 sec: 750.9, 300 sec: 791.4). Total num frames: 10153984. Throughput: 0: 199.1. Samples: 2155110. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:22,164][00307] Avg episode reward: [(0, '29.876')] -[2024-09-01 09:54:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 791.4). Total num frames: 10158080. Throughput: 0: 190.3. Samples: 2156154. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:27,161][00307] Avg episode reward: [(0, '30.113')] -[2024-09-01 09:54:32,154][00307] Fps is (10 sec: 409.8, 60 sec: 751.0, 300 sec: 777.5). Total num frames: 10158080. Throughput: 0: 183.8. Samples: 2157132. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:32,157][00307] Avg episode reward: [(0, '30.591')] -[2024-09-01 09:54:37,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10162176. Throughput: 0: 190.2. Samples: 2157678. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:37,156][00307] Avg episode reward: [(0, '30.631')] -[2024-09-01 09:54:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10166272. Throughput: 0: 196.0. Samples: 2159112. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:42,163][00307] Avg episode reward: [(0, '30.757')] -[2024-09-01 09:54:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10170368. Throughput: 0: 182.6. Samples: 2159980. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:54:47,156][00307] Avg episode reward: [(0, '29.317')] -[2024-09-01 09:54:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 751.0, 300 sec: 763.7). Total num frames: 10174464. Throughput: 0: 184.0. Samples: 2160354. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:54:52,157][00307] Avg episode reward: [(0, '29.317')] -[2024-09-01 09:54:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10178560. Throughput: 0: 184.6. Samples: 2161394. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:54:57,158][00307] Avg episode reward: [(0, '28.748')] -[2024-09-01 09:54:59,792][16881] Updated weights for policy 0, policy_version 2486 (0.1809) -[2024-09-01 09:55:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10182656. Throughput: 0: 195.3. Samples: 2162948. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:02,169][00307] Avg episode reward: [(0, '29.105')] -[2024-09-01 09:55:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10186752. Throughput: 0: 182.1. Samples: 2163304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:07,166][00307] Avg episode reward: [(0, '28.790')] -[2024-09-01 09:55:12,157][00307] Fps is (10 sec: 818.9, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10190848. Throughput: 0: 179.9. Samples: 2164250. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:12,167][00307] Avg episode reward: [(0, '28.934')] -[2024-09-01 09:55:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10194944. Throughput: 0: 181.6. Samples: 2165302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:17,164][00307] Avg episode reward: [(0, '28.785')] -[2024-09-01 09:55:22,154][00307] Fps is (10 sec: 819.5, 60 sec: 751.0, 300 sec: 763.7). Total num frames: 10199040. Throughput: 0: 190.6. Samples: 2166254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:22,159][00307] Avg episode reward: [(0, '28.658')] -[2024-09-01 09:55:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 750.9, 300 sec: 763.7). Total num frames: 10203136. Throughput: 0: 181.3. Samples: 2167270. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:27,156][00307] Avg episode reward: [(0, '29.267')] -[2024-09-01 09:55:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 749.8). Total num frames: 10203136. Throughput: 0: 185.7. Samples: 2168338. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:32,159][00307] Avg episode reward: [(0, '29.080')] -[2024-09-01 09:55:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 763.7). Total num frames: 10211328. Throughput: 0: 199.3. Samples: 2169322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:37,157][00307] Avg episode reward: [(0, '28.491')] -[2024-09-01 09:55:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 763.7). Total num frames: 10215424. Throughput: 0: 205.0. Samples: 2170620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:42,156][00307] Avg episode reward: [(0, '28.519')] -[2024-09-01 09:55:47,158][00307] Fps is (10 sec: 818.9, 60 sec: 819.1, 300 sec: 763.7). Total num frames: 10219520. Throughput: 0: 198.2. Samples: 2171870. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:55:47,164][00307] Avg episode reward: [(0, '28.519')] -[2024-09-01 09:55:51,268][16881] Updated weights for policy 0, policy_version 2496 (0.0566) -[2024-09-01 09:55:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 763.7). Total num frames: 10223616. Throughput: 0: 202.3. Samples: 2172408. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:52,162][00307] Avg episode reward: [(0, '28.152')] -[2024-09-01 09:55:57,154][00307] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 763.7). Total num frames: 10227712. Throughput: 0: 216.5. Samples: 2173992. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 09:55:57,165][00307] Avg episode reward: [(0, '28.029')] -[2024-09-01 09:56:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 763.7). Total num frames: 10231808. Throughput: 0: 227.2. Samples: 2175524. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:56:02,156][00307] Avg episode reward: [(0, '28.114')] -[2024-09-01 09:56:03,518][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002499_10235904.pth... -[2024-09-01 09:56:03,632][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002453_10047488.pth -[2024-09-01 09:56:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 777.5). Total num frames: 10235904. Throughput: 0: 214.2. Samples: 2175892. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:56:07,163][00307] Avg episode reward: [(0, '28.062')] -[2024-09-01 09:56:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 777.5). Total num frames: 10240000. Throughput: 0: 224.2. Samples: 2177360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:56:12,157][00307] Avg episode reward: [(0, '28.159')] -[2024-09-01 09:56:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 791.4). Total num frames: 10248192. Throughput: 0: 228.9. Samples: 2178638. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:56:17,163][00307] Avg episode reward: [(0, '28.870')] -[2024-09-01 09:56:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 777.5). Total num frames: 10248192. Throughput: 0: 224.2. Samples: 2179410. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:56:22,163][00307] Avg episode reward: [(0, '28.778')] -[2024-09-01 09:56:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 777.5). Total num frames: 10252288. Throughput: 0: 221.6. Samples: 2180594. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:56:27,167][00307] Avg episode reward: [(0, '28.398')] -[2024-09-01 09:56:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 791.4). Total num frames: 10260480. Throughput: 0: 222.9. Samples: 2181900. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:56:32,162][00307] Avg episode reward: [(0, '28.463')] -[2024-09-01 09:56:35,421][16881] Updated weights for policy 0, policy_version 2506 (0.1089) -[2024-09-01 09:56:37,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 791.4). Total num frames: 10264576. Throughput: 0: 229.5. Samples: 2182736. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:56:37,163][00307] Avg episode reward: [(0, '28.417')] -[2024-09-01 09:56:42,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 791.4). Total num frames: 10268672. Throughput: 0: 219.6. Samples: 2183876. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:56:42,163][00307] Avg episode reward: [(0, '28.178')] -[2024-09-01 09:56:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 791.4). Total num frames: 10272768. Throughput: 0: 214.2. Samples: 2185164. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:56:47,157][00307] Avg episode reward: [(0, '27.688')] -[2024-09-01 09:56:52,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 791.4). Total num frames: 10276864. Throughput: 0: 221.1. Samples: 2185840. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:56:52,156][00307] Avg episode reward: [(0, '27.719')] -[2024-09-01 09:56:57,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 791.4). Total num frames: 10280960. Throughput: 0: 223.1. Samples: 2187398. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:56:57,164][00307] Avg episode reward: [(0, '27.890')] -[2024-09-01 09:57:02,162][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 791.4). Total num frames: 10285056. Throughput: 0: 216.6. Samples: 2188386. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:02,166][00307] Avg episode reward: [(0, '28.100')] -[2024-09-01 09:57:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 10289152. Throughput: 0: 216.9. Samples: 2189170. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:07,163][00307] Avg episode reward: [(0, '27.844')] -[2024-09-01 09:57:12,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 10293248. Throughput: 0: 228.7. Samples: 2190886. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:12,163][00307] Avg episode reward: [(0, '27.771')] -[2024-09-01 09:57:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 805.3). Total num frames: 10297344. Throughput: 0: 223.9. Samples: 2191974. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:57:17,156][00307] Avg episode reward: [(0, '27.661')] -[2024-09-01 09:57:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 805.3). Total num frames: 10301440. Throughput: 0: 216.4. Samples: 2192474. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:57:22,166][00307] Avg episode reward: [(0, '27.386')] -[2024-09-01 09:57:22,741][16881] Updated weights for policy 0, policy_version 2516 (0.1710) -[2024-09-01 09:57:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 10309632. Throughput: 0: 225.9. Samples: 2194038. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:57:27,161][00307] Avg episode reward: [(0, '27.613')] -[2024-09-01 09:57:32,158][00307] Fps is (10 sec: 1228.3, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 10313728. Throughput: 0: 225.4. Samples: 2195308. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:32,164][00307] Avg episode reward: [(0, '28.127')] -[2024-09-01 09:57:37,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 10317824. Throughput: 0: 225.7. Samples: 2195996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:37,162][00307] Avg episode reward: [(0, '28.195')] -[2024-09-01 09:57:42,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 10321920. Throughput: 0: 215.3. Samples: 2197088. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:42,157][00307] Avg episode reward: [(0, '27.796')] -[2024-09-01 09:57:47,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 10326016. Throughput: 0: 230.5. Samples: 2198756. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:57:47,157][00307] Avg episode reward: [(0, '28.266')] -[2024-09-01 09:57:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 10330112. Throughput: 0: 226.0. Samples: 2199342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:57:52,158][00307] Avg episode reward: [(0, '28.482')] -[2024-09-01 09:57:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 10334208. Throughput: 0: 210.8. Samples: 2200370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:57:57,156][00307] Avg episode reward: [(0, '28.345')] -[2024-09-01 09:57:59,288][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002524_10338304.pth... -[2024-09-01 09:57:59,407][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002475_10137600.pth -[2024-09-01 09:58:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 819.2). Total num frames: 10338304. Throughput: 0: 227.9. Samples: 2202228. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:58:02,164][00307] Avg episode reward: [(0, '28.943')] -[2024-09-01 09:58:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 10342400. Throughput: 0: 233.1. Samples: 2202964. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:58:07,157][00307] Avg episode reward: [(0, '29.445')] -[2024-09-01 09:58:07,555][16881] Updated weights for policy 0, policy_version 2526 (0.1637) -[2024-09-01 09:58:11,550][16868] Signal inference workers to stop experience collection... (2150 times) -[2024-09-01 09:58:11,612][16881] InferenceWorker_p0-w0: stopping experience collection (2150 times) -[2024-09-01 09:58:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 833.1). Total num frames: 10346496. Throughput: 0: 223.1. Samples: 2204078. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:58:12,157][00307] Avg episode reward: [(0, '29.430')] -[2024-09-01 09:58:13,495][16868] Signal inference workers to resume experience collection... (2150 times) -[2024-09-01 09:58:13,497][16881] InferenceWorker_p0-w0: resuming experience collection (2150 times) -[2024-09-01 09:58:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 10350592. Throughput: 0: 221.2. Samples: 2205260. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:58:17,162][00307] Avg episode reward: [(0, '29.348')] -[2024-09-01 09:58:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 847.0). Total num frames: 10358784. Throughput: 0: 224.3. Samples: 2206090. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:58:22,157][00307] Avg episode reward: [(0, '29.804')] -[2024-09-01 09:58:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10362880. Throughput: 0: 226.4. Samples: 2207274. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:58:27,157][00307] Avg episode reward: [(0, '29.299')] -[2024-09-01 09:58:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10366976. Throughput: 0: 215.2. Samples: 2208440. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:58:32,162][00307] Avg episode reward: [(0, '29.799')] -[2024-09-01 09:58:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10371072. Throughput: 0: 222.7. Samples: 2209364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:58:37,156][00307] Avg episode reward: [(0, '29.930')] -[2024-09-01 09:58:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10375168. Throughput: 0: 235.6. Samples: 2210972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:58:42,161][00307] Avg episode reward: [(0, '30.017')] -[2024-09-01 09:58:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10379264. Throughput: 0: 215.2. Samples: 2211910. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:58:47,158][00307] Avg episode reward: [(0, '30.017')] -[2024-09-01 09:58:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10383360. Throughput: 0: 211.5. Samples: 2212480. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:58:52,158][00307] Avg episode reward: [(0, '29.852')] -[2024-09-01 09:58:54,038][16881] Updated weights for policy 0, policy_version 2536 (0.1122) -[2024-09-01 09:58:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10387456. Throughput: 0: 227.6. Samples: 2214322. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:58:57,165][00307] Avg episode reward: [(0, '29.848')] -[2024-09-01 09:59:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10391552. Throughput: 0: 227.0. Samples: 2215474. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:59:02,159][00307] Avg episode reward: [(0, '30.857')] -[2024-09-01 09:59:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 847.0). Total num frames: 10395648. Throughput: 0: 218.8. Samples: 2215936. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:59:07,157][00307] Avg episode reward: [(0, '30.787')] -[2024-09-01 09:59:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 860.9). Total num frames: 10403840. Throughput: 0: 229.9. Samples: 2217620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:59:12,156][00307] Avg episode reward: [(0, '30.297')] -[2024-09-01 09:59:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 860.9). Total num frames: 10407936. Throughput: 0: 234.8. Samples: 2219008. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 09:59:17,156][00307] Avg episode reward: [(0, '30.283')] -[2024-09-01 09:59:22,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 860.8). Total num frames: 10412032. Throughput: 0: 229.1. Samples: 2219674. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:59:22,165][00307] Avg episode reward: [(0, '29.978')] -[2024-09-01 09:59:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10416128. Throughput: 0: 215.4. Samples: 2220664. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:59:27,163][00307] Avg episode reward: [(0, '31.204')] -[2024-09-01 09:59:32,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10420224. Throughput: 0: 232.7. Samples: 2222382. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:59:32,161][00307] Avg episode reward: [(0, '31.600')] -[2024-09-01 09:59:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10424320. Throughput: 0: 229.0. Samples: 2222786. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:59:37,157][00307] Avg episode reward: [(0, '31.487')] -[2024-09-01 09:59:39,951][16881] Updated weights for policy 0, policy_version 2546 (0.1097) -[2024-09-01 09:59:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10428416. Throughput: 0: 214.0. Samples: 2223950. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:59:42,164][00307] Avg episode reward: [(0, '32.515')] -[2024-09-01 09:59:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10432512. Throughput: 0: 220.1. Samples: 2225378. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 09:59:47,157][00307] Avg episode reward: [(0, '32.473')] -[2024-09-01 09:59:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10436608. Throughput: 0: 228.3. Samples: 2226210. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:59:52,156][00307] Avg episode reward: [(0, '32.147')] -[2024-09-01 09:59:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10440704. Throughput: 0: 221.0. Samples: 2227564. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 09:59:57,156][00307] Avg episode reward: [(0, '32.161')] -[2024-09-01 10:00:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10444800. Throughput: 0: 217.2. Samples: 2228780. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:00:02,165][00307] Avg episode reward: [(0, '32.285')] -[2024-09-01 10:00:03,135][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002551_10448896.pth... -[2024-09-01 10:00:03,257][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002499_10235904.pth -[2024-09-01 10:00:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 10448896. Throughput: 0: 219.7. Samples: 2229558. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:00:07,166][00307] Avg episode reward: [(0, '31.543')] -[2024-09-01 10:00:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10457088. Throughput: 0: 227.6. Samples: 2230908. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:00:12,164][00307] Avg episode reward: [(0, '32.433')] -[2024-09-01 10:00:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 10457088. Throughput: 0: 211.2. Samples: 2231884. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:00:17,162][00307] Avg episode reward: [(0, '32.996')] -[2024-09-01 10:00:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10465280. Throughput: 0: 222.4. Samples: 2232794. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:00:22,160][00307] Avg episode reward: [(0, '32.866')] -[2024-09-01 10:00:25,426][16881] Updated weights for policy 0, policy_version 2556 (0.0559) -[2024-09-01 10:00:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10469376. Throughput: 0: 226.0. Samples: 2234122. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:00:27,163][00307] Avg episode reward: [(0, '32.476')] -[2024-09-01 10:00:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10473472. Throughput: 0: 223.5. Samples: 2235434. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:00:32,160][00307] Avg episode reward: [(0, '32.374')] -[2024-09-01 10:00:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10477568. Throughput: 0: 217.0. Samples: 2235974. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:00:37,158][00307] Avg episode reward: [(0, '32.214')] -[2024-09-01 10:00:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10481664. Throughput: 0: 220.4. Samples: 2237484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:00:42,157][00307] Avg episode reward: [(0, '32.285')] -[2024-09-01 10:00:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10485760. Throughput: 0: 225.9. Samples: 2238946. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:00:47,159][00307] Avg episode reward: [(0, '32.130')] -[2024-09-01 10:00:52,163][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 10489856. Throughput: 0: 217.5. Samples: 2239346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:00:52,166][00307] Avg episode reward: [(0, '31.900')] -[2024-09-01 10:00:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10493952. Throughput: 0: 212.6. Samples: 2240476. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:00:57,158][00307] Avg episode reward: [(0, '31.551')] -[2024-09-01 10:01:02,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10498048. Throughput: 0: 227.1. Samples: 2242102. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:01:02,166][00307] Avg episode reward: [(0, '30.914')] -[2024-09-01 10:01:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10502144. Throughput: 0: 221.6. Samples: 2242766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:01:07,157][00307] Avg episode reward: [(0, '30.203')] -[2024-09-01 10:01:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 10506240. Throughput: 0: 216.5. Samples: 2243866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:01:12,159][00307] Avg episode reward: [(0, '29.737')] -[2024-09-01 10:01:13,476][16881] Updated weights for policy 0, policy_version 2566 (0.2087) -[2024-09-01 10:01:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10514432. Throughput: 0: 216.0. Samples: 2245154. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:01:17,157][00307] Avg episode reward: [(0, '30.595')] -[2024-09-01 10:01:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10518528. Throughput: 0: 226.4. Samples: 2246162. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:01:22,163][00307] Avg episode reward: [(0, '31.092')] -[2024-09-01 10:01:27,163][00307] Fps is (10 sec: 818.5, 60 sec: 887.3, 300 sec: 888.6). Total num frames: 10522624. Throughput: 0: 215.4. Samples: 2247178. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:01:27,166][00307] Avg episode reward: [(0, '30.596')] -[2024-09-01 10:01:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10526720. Throughput: 0: 210.2. Samples: 2248406. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:01:32,157][00307] Avg episode reward: [(0, '30.627')] -[2024-09-01 10:01:37,154][00307] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10530816. Throughput: 0: 220.5. Samples: 2249268. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:01:37,156][00307] Avg episode reward: [(0, '31.524')] -[2024-09-01 10:01:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 10534912. Throughput: 0: 225.9. Samples: 2250642. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:01:42,160][00307] Avg episode reward: [(0, '30.998')] -[2024-09-01 10:01:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10539008. Throughput: 0: 216.2. Samples: 2251832. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:01:47,157][00307] Avg episode reward: [(0, '31.378')] -[2024-09-01 10:01:52,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 10543104. Throughput: 0: 213.3. Samples: 2252364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:01:52,156][00307] Avg episode reward: [(0, '31.340')] -[2024-09-01 10:01:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10547200. Throughput: 0: 233.9. Samples: 2254392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:01:57,168][00307] Avg episode reward: [(0, '30.999')] -[2024-09-01 10:01:57,405][16881] Updated weights for policy 0, policy_version 2576 (0.1548) -[2024-09-01 10:02:01,189][16868] Signal inference workers to stop experience collection... (2200 times) -[2024-09-01 10:02:01,310][16881] InferenceWorker_p0-w0: stopping experience collection (2200 times) -[2024-09-01 10:02:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10551296. Throughput: 0: 227.6. Samples: 2255396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:02:02,156][00307] Avg episode reward: [(0, '30.677')] -[2024-09-01 10:02:02,961][16868] Signal inference workers to resume experience collection... (2200 times) -[2024-09-01 10:02:02,962][16881] InferenceWorker_p0-w0: resuming experience collection (2200 times) -[2024-09-01 10:02:02,970][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002577_10555392.pth... -[2024-09-01 10:02:03,169][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002524_10338304.pth -[2024-09-01 10:02:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10555392. Throughput: 0: 216.6. Samples: 2255908. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:02:07,163][00307] Avg episode reward: [(0, '30.644')] -[2024-09-01 10:02:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10563584. Throughput: 0: 229.5. Samples: 2257502. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:02:12,160][00307] Avg episode reward: [(0, '30.606')] -[2024-09-01 10:02:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10567680. Throughput: 0: 232.5. Samples: 2258870. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:02:17,158][00307] Avg episode reward: [(0, '30.186')] -[2024-09-01 10:02:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10571776. Throughput: 0: 227.2. Samples: 2259494. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:02:22,166][00307] Avg episode reward: [(0, '30.811')] -[2024-09-01 10:02:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 10575872. Throughput: 0: 218.5. Samples: 2260472. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:02:27,162][00307] Avg episode reward: [(0, '30.112')] -[2024-09-01 10:02:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10579968. Throughput: 0: 230.2. Samples: 2262192. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:02:32,165][00307] Avg episode reward: [(0, '28.734')] -[2024-09-01 10:02:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10584064. Throughput: 0: 231.1. Samples: 2262764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:02:37,156][00307] Avg episode reward: [(0, '28.975')] -[2024-09-01 10:02:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10588160. Throughput: 0: 209.2. Samples: 2263804. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:02:42,158][00307] Avg episode reward: [(0, '28.727')] -[2024-09-01 10:02:45,106][16881] Updated weights for policy 0, policy_version 2586 (0.0531) -[2024-09-01 10:02:47,156][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10592256. Throughput: 0: 222.4. Samples: 2265406. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:02:47,166][00307] Avg episode reward: [(0, '28.185')] -[2024-09-01 10:02:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10596352. Throughput: 0: 224.4. Samples: 2266006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:02:52,158][00307] Avg episode reward: [(0, '28.649')] -[2024-09-01 10:02:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10600448. Throughput: 0: 219.9. Samples: 2267396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:02:57,157][00307] Avg episode reward: [(0, '29.265')] -[2024-09-01 10:03:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10604544. Throughput: 0: 218.8. Samples: 2268718. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:02,163][00307] Avg episode reward: [(0, '29.495')] -[2024-09-01 10:03:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10612736. Throughput: 0: 223.9. Samples: 2269570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:07,156][00307] Avg episode reward: [(0, '29.417')] -[2024-09-01 10:03:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10616832. Throughput: 0: 227.0. Samples: 2270688. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:12,160][00307] Avg episode reward: [(0, '28.922')] -[2024-09-01 10:03:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10620928. Throughput: 0: 213.2. Samples: 2271786. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:17,157][00307] Avg episode reward: [(0, '29.389')] -[2024-09-01 10:03:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10625024. Throughput: 0: 223.6. Samples: 2272826. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:22,157][00307] Avg episode reward: [(0, '29.625')] -[2024-09-01 10:03:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10629120. Throughput: 0: 233.8. Samples: 2274326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:27,159][00307] Avg episode reward: [(0, '29.554')] -[2024-09-01 10:03:29,908][16881] Updated weights for policy 0, policy_version 2596 (0.1113) -[2024-09-01 10:03:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10633216. Throughput: 0: 220.6. Samples: 2275334. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:03:32,161][00307] Avg episode reward: [(0, '29.491')] -[2024-09-01 10:03:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10637312. Throughput: 0: 219.4. Samples: 2275878. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:03:37,158][00307] Avg episode reward: [(0, '29.213')] -[2024-09-01 10:03:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10641408. Throughput: 0: 229.6. Samples: 2277726. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:42,165][00307] Avg episode reward: [(0, '29.440')] -[2024-09-01 10:03:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10645504. Throughput: 0: 226.8. Samples: 2278924. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:03:47,157][00307] Avg episode reward: [(0, '29.427')] -[2024-09-01 10:03:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10649600. Throughput: 0: 216.6. Samples: 2279318. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:03:52,163][00307] Avg episode reward: [(0, '29.049')] -[2024-09-01 10:03:57,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10657792. Throughput: 0: 224.8. Samples: 2280802. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:03:57,168][00307] Avg episode reward: [(0, '28.920')] -[2024-09-01 10:04:00,892][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002603_10661888.pth... -[2024-09-01 10:04:01,011][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002551_10448896.pth -[2024-09-01 10:04:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10661888. Throughput: 0: 234.8. Samples: 2282354. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:04:02,159][00307] Avg episode reward: [(0, '27.874')] -[2024-09-01 10:04:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10665984. Throughput: 0: 223.5. Samples: 2282882. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:07,165][00307] Avg episode reward: [(0, '27.097')] -[2024-09-01 10:04:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10670080. Throughput: 0: 213.0. Samples: 2283910. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:12,156][00307] Avg episode reward: [(0, '26.695')] -[2024-09-01 10:04:15,260][16881] Updated weights for policy 0, policy_version 2606 (0.2071) -[2024-09-01 10:04:17,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 10674176. Throughput: 0: 229.7. Samples: 2285672. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:17,161][00307] Avg episode reward: [(0, '27.033')] -[2024-09-01 10:04:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10678272. Throughput: 0: 227.2. Samples: 2286102. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:22,164][00307] Avg episode reward: [(0, '27.033')] -[2024-09-01 10:04:27,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10682368. Throughput: 0: 212.7. Samples: 2287296. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:27,162][00307] Avg episode reward: [(0, '27.712')] -[2024-09-01 10:04:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10686464. Throughput: 0: 224.1. Samples: 2289008. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:32,166][00307] Avg episode reward: [(0, '27.440')] -[2024-09-01 10:04:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10690560. Throughput: 0: 230.3. Samples: 2289682. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:37,163][00307] Avg episode reward: [(0, '27.087')] -[2024-09-01 10:04:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10694656. Throughput: 0: 227.8. Samples: 2291052. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:42,156][00307] Avg episode reward: [(0, '27.663')] -[2024-09-01 10:04:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10698752. Throughput: 0: 217.8. Samples: 2292154. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:47,164][00307] Avg episode reward: [(0, '27.055')] -[2024-09-01 10:04:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10706944. Throughput: 0: 225.6. Samples: 2293036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:52,158][00307] Avg episode reward: [(0, '27.482')] -[2024-09-01 10:04:57,159][00307] Fps is (10 sec: 1228.2, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 10711040. Throughput: 0: 230.7. Samples: 2294292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:04:57,165][00307] Avg episode reward: [(0, '27.978')] -[2024-09-01 10:05:01,496][16881] Updated weights for policy 0, policy_version 2616 (0.1043) -[2024-09-01 10:05:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10715136. Throughput: 0: 213.9. Samples: 2295296. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:05:02,163][00307] Avg episode reward: [(0, '27.008')] -[2024-09-01 10:05:07,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10719232. Throughput: 0: 224.6. Samples: 2296208. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:05:07,157][00307] Avg episode reward: [(0, '27.348')] -[2024-09-01 10:05:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10723328. Throughput: 0: 230.8. Samples: 2297680. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:12,164][00307] Avg episode reward: [(0, '26.828')] -[2024-09-01 10:05:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10727424. Throughput: 0: 217.1. Samples: 2298778. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:17,157][00307] Avg episode reward: [(0, '26.892')] -[2024-09-01 10:05:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10731520. Throughput: 0: 212.7. Samples: 2299252. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:05:22,161][00307] Avg episode reward: [(0, '27.563')] -[2024-09-01 10:05:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10735616. Throughput: 0: 221.1. Samples: 2301002. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:05:27,168][00307] Avg episode reward: [(0, '27.822')] -[2024-09-01 10:05:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10739712. Throughput: 0: 226.8. Samples: 2302360. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:32,156][00307] Avg episode reward: [(0, '28.075')] -[2024-09-01 10:05:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10743808. Throughput: 0: 220.8. Samples: 2302972. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:37,160][00307] Avg episode reward: [(0, '27.554')] -[2024-09-01 10:05:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10747904. Throughput: 0: 225.4. Samples: 2304432. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:42,156][00307] Avg episode reward: [(0, '27.126')] -[2024-09-01 10:05:46,139][16881] Updated weights for policy 0, policy_version 2626 (0.1218) -[2024-09-01 10:05:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10756096. Throughput: 0: 234.4. Samples: 2305844. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:47,161][00307] Avg episode reward: [(0, '27.089')] -[2024-09-01 10:05:48,592][16868] Signal inference workers to stop experience collection... (2250 times) -[2024-09-01 10:05:48,693][16881] InferenceWorker_p0-w0: stopping experience collection (2250 times) -[2024-09-01 10:05:50,864][16868] Signal inference workers to resume experience collection... (2250 times) -[2024-09-01 10:05:50,864][16881] InferenceWorker_p0-w0: resuming experience collection (2250 times) -[2024-09-01 10:05:52,160][00307] Fps is (10 sec: 1228.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 10760192. Throughput: 0: 228.5. Samples: 2306494. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:52,172][00307] Avg episode reward: [(0, '27.324')] -[2024-09-01 10:05:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10764288. Throughput: 0: 218.6. Samples: 2307516. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:05:57,157][00307] Avg episode reward: [(0, '26.905')] -[2024-09-01 10:06:00,414][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002629_10768384.pth... -[2024-09-01 10:06:00,507][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002577_10555392.pth -[2024-09-01 10:06:02,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10768384. Throughput: 0: 231.4. Samples: 2309190. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:02,157][00307] Avg episode reward: [(0, '26.914')] -[2024-09-01 10:06:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10772480. Throughput: 0: 230.2. Samples: 2309610. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:06:07,161][00307] Avg episode reward: [(0, '26.870')] -[2024-09-01 10:06:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10776576. Throughput: 0: 220.9. Samples: 2310944. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:06:12,157][00307] Avg episode reward: [(0, '26.795')] -[2024-09-01 10:06:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10780672. Throughput: 0: 223.8. Samples: 2312432. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:06:17,157][00307] Avg episode reward: [(0, '27.673')] -[2024-09-01 10:06:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10784768. Throughput: 0: 223.2. Samples: 2313018. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:06:22,165][00307] Avg episode reward: [(0, '27.974')] -[2024-09-01 10:06:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10788864. Throughput: 0: 220.6. Samples: 2314358. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:27,164][00307] Avg episode reward: [(0, '27.990')] -[2024-09-01 10:06:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10792960. Throughput: 0: 217.9. Samples: 2315648. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:32,161][00307] Avg episode reward: [(0, '28.339')] -[2024-09-01 10:06:33,451][16881] Updated weights for policy 0, policy_version 2636 (0.1559) -[2024-09-01 10:06:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10797056. Throughput: 0: 219.0. Samples: 2316346. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:37,166][00307] Avg episode reward: [(0, '28.818')] -[2024-09-01 10:06:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10805248. Throughput: 0: 226.9. Samples: 2317726. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:42,168][00307] Avg episode reward: [(0, '28.696')] -[2024-09-01 10:06:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10809344. Throughput: 0: 214.7. Samples: 2318850. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:47,161][00307] Avg episode reward: [(0, '28.567')] -[2024-09-01 10:06:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 10813440. Throughput: 0: 222.8. Samples: 2319638. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:52,157][00307] Avg episode reward: [(0, '28.764')] -[2024-09-01 10:06:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10817536. Throughput: 0: 224.9. Samples: 2321066. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:06:57,160][00307] Avg episode reward: [(0, '29.397')] -[2024-09-01 10:07:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10821632. Throughput: 0: 224.7. Samples: 2322542. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:02,163][00307] Avg episode reward: [(0, '30.038')] -[2024-09-01 10:07:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10825728. Throughput: 0: 219.1. Samples: 2322878. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:07,160][00307] Avg episode reward: [(0, '29.966')] -[2024-09-01 10:07:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10829824. Throughput: 0: 224.8. Samples: 2324476. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:12,158][00307] Avg episode reward: [(0, '29.874')] -[2024-09-01 10:07:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10833920. Throughput: 0: 230.0. Samples: 2325996. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:17,158][00307] Avg episode reward: [(0, '29.607')] -[2024-09-01 10:07:17,460][16881] Updated weights for policy 0, policy_version 2646 (0.0039) -[2024-09-01 10:07:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10838016. Throughput: 0: 228.0. Samples: 2326604. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:22,160][00307] Avg episode reward: [(0, '29.658')] -[2024-09-01 10:07:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10842112. Throughput: 0: 223.2. Samples: 2327770. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:07:27,168][00307] Avg episode reward: [(0, '29.431')] -[2024-09-01 10:07:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10850304. Throughput: 0: 229.6. Samples: 2329184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:07:32,160][00307] Avg episode reward: [(0, '29.293')] -[2024-09-01 10:07:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10854400. Throughput: 0: 232.8. Samples: 2330114. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:07:37,158][00307] Avg episode reward: [(0, '29.466')] -[2024-09-01 10:07:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10858496. Throughput: 0: 223.3. Samples: 2331114. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:07:42,161][00307] Avg episode reward: [(0, '29.666')] -[2024-09-01 10:07:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10862592. Throughput: 0: 219.4. Samples: 2332414. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:07:47,156][00307] Avg episode reward: [(0, '29.457')] -[2024-09-01 10:07:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10866688. Throughput: 0: 230.5. Samples: 2333252. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:07:52,158][00307] Avg episode reward: [(0, '29.447')] -[2024-09-01 10:07:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10870784. Throughput: 0: 226.8. Samples: 2334680. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:07:57,157][00307] Avg episode reward: [(0, '29.990')] -[2024-09-01 10:07:59,989][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002655_10874880.pth... -[2024-09-01 10:08:00,150][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002603_10661888.pth -[2024-09-01 10:08:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10874880. Throughput: 0: 221.2. Samples: 2335950. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:02,158][00307] Avg episode reward: [(0, '29.668')] -[2024-09-01 10:08:04,642][16881] Updated weights for policy 0, policy_version 2656 (0.1545) -[2024-09-01 10:08:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10878976. Throughput: 0: 216.8. Samples: 2336358. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:07,158][00307] Avg episode reward: [(0, '29.575')] -[2024-09-01 10:08:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10883072. Throughput: 0: 233.1. Samples: 2338258. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:12,157][00307] Avg episode reward: [(0, '29.367')] -[2024-09-01 10:08:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10887168. Throughput: 0: 224.3. Samples: 2339276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:08:17,156][00307] Avg episode reward: [(0, '30.381')] -[2024-09-01 10:08:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10891264. Throughput: 0: 219.5. Samples: 2339990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:08:22,158][00307] Avg episode reward: [(0, '30.029')] -[2024-09-01 10:08:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10899456. Throughput: 0: 226.9. Samples: 2341326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:27,160][00307] Avg episode reward: [(0, '29.980')] -[2024-09-01 10:08:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10903552. Throughput: 0: 227.0. Samples: 2342630. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:08:32,163][00307] Avg episode reward: [(0, '29.222')] -[2024-09-01 10:08:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10907648. Throughput: 0: 224.0. Samples: 2343332. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:08:37,160][00307] Avg episode reward: [(0, '29.254')] -[2024-09-01 10:08:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10911744. Throughput: 0: 217.7. Samples: 2344478. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:42,160][00307] Avg episode reward: [(0, '29.885')] -[2024-09-01 10:08:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10915840. Throughput: 0: 231.0. Samples: 2346344. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:47,162][00307] Avg episode reward: [(0, '29.249')] -[2024-09-01 10:08:49,699][16881] Updated weights for policy 0, policy_version 2666 (0.1065) -[2024-09-01 10:08:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10919936. Throughput: 0: 228.0. Samples: 2346620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:52,158][00307] Avg episode reward: [(0, '29.179')] -[2024-09-01 10:08:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10924032. Throughput: 0: 216.2. Samples: 2347988. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:08:57,157][00307] Avg episode reward: [(0, '29.167')] -[2024-09-01 10:09:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10928128. Throughput: 0: 229.6. Samples: 2349608. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:09:02,163][00307] Avg episode reward: [(0, '29.038')] -[2024-09-01 10:09:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10932224. Throughput: 0: 230.5. Samples: 2350362. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:09:07,159][00307] Avg episode reward: [(0, '28.995')] -[2024-09-01 10:09:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10936320. Throughput: 0: 223.8. Samples: 2351396. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:09:12,162][00307] Avg episode reward: [(0, '28.781')] -[2024-09-01 10:09:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10944512. Throughput: 0: 225.5. Samples: 2352776. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:09:17,160][00307] Avg episode reward: [(0, '29.285')] -[2024-09-01 10:09:22,156][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10948608. Throughput: 0: 230.9. Samples: 2353722. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:09:22,159][00307] Avg episode reward: [(0, '28.122')] -[2024-09-01 10:09:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10952704. Throughput: 0: 227.1. Samples: 2354698. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:09:27,159][00307] Avg episode reward: [(0, '28.388')] -[2024-09-01 10:09:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10956800. Throughput: 0: 214.8. Samples: 2356010. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:09:32,157][00307] Avg episode reward: [(0, '27.992')] -[2024-09-01 10:09:35,147][16881] Updated weights for policy 0, policy_version 2676 (0.1036) -[2024-09-01 10:09:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10960896. Throughput: 0: 225.2. Samples: 2356756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:09:37,168][00307] Avg episode reward: [(0, '28.446')] -[2024-09-01 10:09:37,609][16868] Signal inference workers to stop experience collection... (2300 times) -[2024-09-01 10:09:37,672][16881] InferenceWorker_p0-w0: stopping experience collection (2300 times) -[2024-09-01 10:09:39,065][16868] Signal inference workers to resume experience collection... (2300 times) -[2024-09-01 10:09:39,066][16881] InferenceWorker_p0-w0: resuming experience collection (2300 times) -[2024-09-01 10:09:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10964992. Throughput: 0: 229.4. Samples: 2358312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:09:42,157][00307] Avg episode reward: [(0, '28.850')] -[2024-09-01 10:09:47,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 10969088. Throughput: 0: 214.8. Samples: 2359274. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:09:47,167][00307] Avg episode reward: [(0, '28.893')] -[2024-09-01 10:09:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10973184. Throughput: 0: 216.1. Samples: 2360088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:09:52,162][00307] Avg episode reward: [(0, '30.092')] -[2024-09-01 10:09:57,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10977280. Throughput: 0: 231.2. Samples: 2361802. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:09:57,163][00307] Avg episode reward: [(0, '30.403')] -[2024-09-01 10:10:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10981376. Throughput: 0: 226.4. Samples: 2362962. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:10:02,162][00307] Avg episode reward: [(0, '29.908')] -[2024-09-01 10:10:02,909][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002682_10985472.pth... -[2024-09-01 10:10:03,078][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002629_10768384.pth -[2024-09-01 10:10:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 10985472. Throughput: 0: 216.5. Samples: 2363462. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:07,163][00307] Avg episode reward: [(0, '29.710')] -[2024-09-01 10:10:12,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 10993664. Throughput: 0: 229.2. Samples: 2365012. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:10:12,157][00307] Avg episode reward: [(0, '29.494')] -[2024-09-01 10:10:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 10997760. Throughput: 0: 232.1. Samples: 2366456. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) -[2024-09-01 10:10:17,156][00307] Avg episode reward: [(0, '29.495')] -[2024-09-01 10:10:21,129][16881] Updated weights for policy 0, policy_version 2686 (0.1691) -[2024-09-01 10:10:22,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11001856. Throughput: 0: 227.5. Samples: 2366992. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:22,159][00307] Avg episode reward: [(0, '29.513')] -[2024-09-01 10:10:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11005952. Throughput: 0: 218.0. Samples: 2368124. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:27,157][00307] Avg episode reward: [(0, '30.020')] -[2024-09-01 10:10:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11010048. Throughput: 0: 235.6. Samples: 2369876. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:10:32,159][00307] Avg episode reward: [(0, '29.446')] -[2024-09-01 10:10:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11014144. Throughput: 0: 228.3. Samples: 2370362. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:10:37,163][00307] Avg episode reward: [(0, '29.682')] -[2024-09-01 10:10:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11018240. Throughput: 0: 214.7. Samples: 2371464. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:42,166][00307] Avg episode reward: [(0, '29.836')] -[2024-09-01 10:10:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11022336. Throughput: 0: 228.3. Samples: 2373234. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:47,162][00307] Avg episode reward: [(0, '29.946')] -[2024-09-01 10:10:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11026432. Throughput: 0: 237.0. Samples: 2374128. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:52,157][00307] Avg episode reward: [(0, '29.243')] -[2024-09-01 10:10:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11030528. Throughput: 0: 223.4. Samples: 2375064. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:10:57,157][00307] Avg episode reward: [(0, '28.561')] -[2024-09-01 10:11:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11038720. Throughput: 0: 217.5. Samples: 2376242. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:11:02,166][00307] Avg episode reward: [(0, '28.889')] -[2024-09-01 10:11:06,130][16881] Updated weights for policy 0, policy_version 2696 (0.1034) -[2024-09-01 10:11:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11042816. Throughput: 0: 226.2. Samples: 2377172. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:07,156][00307] Avg episode reward: [(0, '28.673')] -[2024-09-01 10:11:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11046912. Throughput: 0: 227.1. Samples: 2378342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:12,156][00307] Avg episode reward: [(0, '28.313')] -[2024-09-01 10:11:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11051008. Throughput: 0: 218.2. Samples: 2379696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:17,164][00307] Avg episode reward: [(0, '28.343')] -[2024-09-01 10:11:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11055104. Throughput: 0: 222.3. Samples: 2380364. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:22,158][00307] Avg episode reward: [(0, '28.131')] -[2024-09-01 10:11:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11059200. Throughput: 0: 237.7. Samples: 2382162. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:27,158][00307] Avg episode reward: [(0, '28.940')] -[2024-09-01 10:11:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11063296. Throughput: 0: 219.8. Samples: 2383126. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:32,158][00307] Avg episode reward: [(0, '28.830')] -[2024-09-01 10:11:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11067392. Throughput: 0: 214.7. Samples: 2383788. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:11:37,159][00307] Avg episode reward: [(0, '28.210')] -[2024-09-01 10:11:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11075584. Throughput: 0: 229.2. Samples: 2385380. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:11:42,166][00307] Avg episode reward: [(0, '28.797')] -[2024-09-01 10:11:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11079680. Throughput: 0: 225.8. Samples: 2386404. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:11:47,156][00307] Avg episode reward: [(0, '27.917')] -[2024-09-01 10:11:52,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11079680. Throughput: 0: 221.0. Samples: 2387118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:11:52,164][00307] Avg episode reward: [(0, '27.404')] -[2024-09-01 10:11:52,954][16881] Updated weights for policy 0, policy_version 2706 (0.2104) -[2024-09-01 10:11:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11087872. Throughput: 0: 224.6. Samples: 2388450. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:11:57,160][00307] Avg episode reward: [(0, '26.364')] -[2024-09-01 10:12:00,261][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002708_11091968.pth... -[2024-09-01 10:12:00,378][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002655_10874880.pth -[2024-09-01 10:12:02,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11091968. Throughput: 0: 230.9. Samples: 2390088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:02,159][00307] Avg episode reward: [(0, '26.588')] -[2024-09-01 10:12:07,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11096064. Throughput: 0: 225.3. Samples: 2390502. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:07,162][00307] Avg episode reward: [(0, '26.588')] -[2024-09-01 10:12:12,156][00307] Fps is (10 sec: 819.2, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11100160. Throughput: 0: 210.9. Samples: 2391654. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:12,165][00307] Avg episode reward: [(0, '26.697')] -[2024-09-01 10:12:17,154][00307] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11104256. Throughput: 0: 230.0. Samples: 2393476. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:17,164][00307] Avg episode reward: [(0, '26.453')] -[2024-09-01 10:12:22,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11108352. Throughput: 0: 224.0. Samples: 2393870. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:22,163][00307] Avg episode reward: [(0, '25.973')] -[2024-09-01 10:12:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11112448. Throughput: 0: 214.2. Samples: 2395020. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:27,161][00307] Avg episode reward: [(0, '26.094')] -[2024-09-01 10:12:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11116544. Throughput: 0: 227.0. Samples: 2396620. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:12:32,163][00307] Avg episode reward: [(0, '25.927')] -[2024-09-01 10:12:36,811][16881] Updated weights for policy 0, policy_version 2716 (0.2071) -[2024-09-01 10:12:37,155][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11124736. Throughput: 0: 226.4. Samples: 2397306. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:12:37,160][00307] Avg episode reward: [(0, '25.952')] -[2024-09-01 10:12:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11124736. Throughput: 0: 228.3. Samples: 2398722. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:12:42,157][00307] Avg episode reward: [(0, '26.401')] -[2024-09-01 10:12:47,158][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11132928. Throughput: 0: 213.0. Samples: 2399672. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:47,163][00307] Avg episode reward: [(0, '26.473')] -[2024-09-01 10:12:52,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11137024. Throughput: 0: 226.0. Samples: 2400670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:12:52,157][00307] Avg episode reward: [(0, '26.567')] -[2024-09-01 10:12:57,158][00307] Fps is (10 sec: 819.3, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11141120. Throughput: 0: 226.9. Samples: 2401864. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:12:57,164][00307] Avg episode reward: [(0, '26.876')] -[2024-09-01 10:13:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11145216. Throughput: 0: 213.1. Samples: 2403066. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:13:02,157][00307] Avg episode reward: [(0, '27.677')] -[2024-09-01 10:13:07,154][00307] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11149312. Throughput: 0: 220.1. Samples: 2403774. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:13:07,156][00307] Avg episode reward: [(0, '27.810')] -[2024-09-01 10:13:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11153408. Throughput: 0: 228.7. Samples: 2405312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:13:12,162][00307] Avg episode reward: [(0, '28.007')] -[2024-09-01 10:13:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11157504. Throughput: 0: 225.5. Samples: 2406768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:13:17,161][00307] Avg episode reward: [(0, '27.839')] -[2024-09-01 10:13:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11161600. Throughput: 0: 216.6. Samples: 2407052. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:13:22,160][00307] Avg episode reward: [(0, '27.642')] -[2024-09-01 10:13:24,129][16881] Updated weights for policy 0, policy_version 2726 (0.3149) -[2024-09-01 10:13:26,532][16868] Signal inference workers to stop experience collection... (2350 times) -[2024-09-01 10:13:26,623][16881] InferenceWorker_p0-w0: stopping experience collection (2350 times) -[2024-09-01 10:13:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11165696. Throughput: 0: 219.5. Samples: 2408600. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:13:27,166][00307] Avg episode reward: [(0, '27.541')] -[2024-09-01 10:13:27,586][16868] Signal inference workers to resume experience collection... (2350 times) -[2024-09-01 10:13:27,588][16881] InferenceWorker_p0-w0: resuming experience collection (2350 times) -[2024-09-01 10:13:32,156][00307] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11173888. Throughput: 0: 229.3. Samples: 2409988. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:13:32,162][00307] Avg episode reward: [(0, '27.471')] -[2024-09-01 10:13:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11173888. Throughput: 0: 222.1. Samples: 2410666. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:13:37,157][00307] Avg episode reward: [(0, '27.944')] -[2024-09-01 10:13:42,154][00307] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11182080. Throughput: 0: 225.6. Samples: 2412014. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:13:42,165][00307] Avg episode reward: [(0, '27.703')] -[2024-09-01 10:13:47,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11186176. Throughput: 0: 233.3. Samples: 2413564. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:13:47,163][00307] Avg episode reward: [(0, '27.977')] -[2024-09-01 10:13:52,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11190272. Throughput: 0: 228.7. Samples: 2414064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:13:52,164][00307] Avg episode reward: [(0, '27.905')] -[2024-09-01 10:13:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11194368. Throughput: 0: 217.0. Samples: 2415078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:13:57,156][00307] Avg episode reward: [(0, '27.908')] -[2024-09-01 10:13:59,861][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002734_11198464.pth... -[2024-09-01 10:13:59,974][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002682_10985472.pth -[2024-09-01 10:14:02,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11198464. Throughput: 0: 227.9. Samples: 2417022. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:14:02,164][00307] Avg episode reward: [(0, '27.627')] -[2024-09-01 10:14:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11202560. Throughput: 0: 235.2. Samples: 2417634. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:14:07,160][00307] Avg episode reward: [(0, '27.663')] -[2024-09-01 10:14:08,762][16881] Updated weights for policy 0, policy_version 2736 (0.0549) -[2024-09-01 10:14:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11206656. Throughput: 0: 223.6. Samples: 2418660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:14:12,162][00307] Avg episode reward: [(0, '27.367')] -[2024-09-01 10:14:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11210752. Throughput: 0: 225.6. Samples: 2420138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:14:17,161][00307] Avg episode reward: [(0, '27.220')] -[2024-09-01 10:14:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11218944. Throughput: 0: 229.2. Samples: 2420978. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:14:22,167][00307] Avg episode reward: [(0, '26.846')] -[2024-09-01 10:14:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11218944. Throughput: 0: 226.0. Samples: 2422184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:14:27,162][00307] Avg episode reward: [(0, '27.143')] -[2024-09-01 10:14:32,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11223040. Throughput: 0: 215.6. Samples: 2423264. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:14:32,156][00307] Avg episode reward: [(0, '27.143')] -[2024-09-01 10:14:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11231232. Throughput: 0: 227.1. Samples: 2424282. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:14:37,158][00307] Avg episode reward: [(0, '27.411')] -[2024-09-01 10:14:42,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11235328. Throughput: 0: 235.5. Samples: 2425676. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:14:42,157][00307] Avg episode reward: [(0, '27.762')] -[2024-09-01 10:14:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11239424. Throughput: 0: 214.1. Samples: 2426658. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:14:47,160][00307] Avg episode reward: [(0, '28.117')] -[2024-09-01 10:14:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11243520. Throughput: 0: 215.7. Samples: 2427340. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:14:52,156][00307] Avg episode reward: [(0, '27.901')] -[2024-09-01 10:14:54,484][16881] Updated weights for policy 0, policy_version 2746 (0.0545) -[2024-09-01 10:14:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11247616. Throughput: 0: 229.6. Samples: 2428990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:14:57,166][00307] Avg episode reward: [(0, '27.414')] -[2024-09-01 10:15:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11251712. Throughput: 0: 221.6. Samples: 2430108. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:15:02,158][00307] Avg episode reward: [(0, '27.884')] -[2024-09-01 10:15:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11255808. Throughput: 0: 213.1. Samples: 2430566. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:07,156][00307] Avg episode reward: [(0, '27.566')] -[2024-09-01 10:15:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11259904. Throughput: 0: 222.6. Samples: 2432202. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:12,157][00307] Avg episode reward: [(0, '27.723')] -[2024-09-01 10:15:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11268096. Throughput: 0: 212.9. Samples: 2432844. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:17,159][00307] Avg episode reward: [(0, '27.937')] -[2024-09-01 10:15:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11268096. Throughput: 0: 217.2. Samples: 2434054. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:22,159][00307] Avg episode reward: [(0, '27.495')] -[2024-09-01 10:15:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11272192. Throughput: 0: 217.0. Samples: 2435442. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:27,165][00307] Avg episode reward: [(0, '28.164')] -[2024-09-01 10:15:32,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11280384. Throughput: 0: 222.6. Samples: 2436674. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:32,157][00307] Avg episode reward: [(0, '28.197')] -[2024-09-01 10:15:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11284480. Throughput: 0: 227.3. Samples: 2437570. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:37,157][00307] Avg episode reward: [(0, '27.848')] -[2024-09-01 10:15:41,136][16881] Updated weights for policy 0, policy_version 2756 (0.1605) -[2024-09-01 10:15:42,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11288576. Throughput: 0: 214.2. Samples: 2438628. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:42,163][00307] Avg episode reward: [(0, '27.878')] -[2024-09-01 10:15:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11292672. Throughput: 0: 226.3. Samples: 2440292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:15:47,157][00307] Avg episode reward: [(0, '26.762')] -[2024-09-01 10:15:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11296768. Throughput: 0: 231.4. Samples: 2440978. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:15:52,159][00307] Avg episode reward: [(0, '26.877')] -[2024-09-01 10:15:57,155][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11300864. Throughput: 0: 224.8. Samples: 2442320. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:15:57,163][00307] Avg episode reward: [(0, '26.934')] -[2024-09-01 10:15:59,077][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002760_11304960.pth... -[2024-09-01 10:15:59,234][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002708_11091968.pth -[2024-09-01 10:16:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11304960. Throughput: 0: 239.8. Samples: 2443636. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:02,156][00307] Avg episode reward: [(0, '26.336')] -[2024-09-01 10:16:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11309056. Throughput: 0: 227.9. Samples: 2444308. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:07,167][00307] Avg episode reward: [(0, '26.455')] -[2024-09-01 10:16:12,157][00307] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11317248. Throughput: 0: 228.7. Samples: 2445732. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:12,165][00307] Avg episode reward: [(0, '26.442')] -[2024-09-01 10:16:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11317248. Throughput: 0: 224.4. Samples: 2446770. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:17,157][00307] Avg episode reward: [(0, '26.520')] -[2024-09-01 10:16:22,154][00307] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11325440. Throughput: 0: 225.5. Samples: 2447716. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:22,157][00307] Avg episode reward: [(0, '26.964')] -[2024-09-01 10:16:25,693][16881] Updated weights for policy 0, policy_version 2766 (0.1041) -[2024-09-01 10:16:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11329536. Throughput: 0: 228.7. Samples: 2448920. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:16:27,163][00307] Avg episode reward: [(0, '28.007')] -[2024-09-01 10:16:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11333632. Throughput: 0: 224.1. Samples: 2450378. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:16:32,160][00307] Avg episode reward: [(0, '28.392')] -[2024-09-01 10:16:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11337728. Throughput: 0: 222.6. Samples: 2450996. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:37,157][00307] Avg episode reward: [(0, '27.887')] -[2024-09-01 10:16:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11341824. Throughput: 0: 226.7. Samples: 2452520. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:16:42,156][00307] Avg episode reward: [(0, '26.781')] -[2024-09-01 10:16:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11345920. Throughput: 0: 231.0. Samples: 2454032. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:16:47,157][00307] Avg episode reward: [(0, '26.364')] -[2024-09-01 10:16:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11350016. Throughput: 0: 224.1. Samples: 2454392. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:16:52,158][00307] Avg episode reward: [(0, '25.776')] -[2024-09-01 10:16:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11354112. Throughput: 0: 220.9. Samples: 2455670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:16:57,164][00307] Avg episode reward: [(0, '26.008')] -[2024-09-01 10:17:02,159][00307] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11362304. Throughput: 0: 230.6. Samples: 2457148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:02,168][00307] Avg episode reward: [(0, '25.553')] -[2024-09-01 10:17:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11366400. Throughput: 0: 229.8. Samples: 2458056. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:07,156][00307] Avg episode reward: [(0, '24.899')] -[2024-09-01 10:17:12,154][00307] Fps is (10 sec: 409.8, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11366400. Throughput: 0: 224.9. Samples: 2459040. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:12,164][00307] Avg episode reward: [(0, '25.160')] -[2024-09-01 10:17:12,372][16881] Updated weights for policy 0, policy_version 2776 (0.0535) -[2024-09-01 10:17:14,680][16868] Signal inference workers to stop experience collection... (2400 times) -[2024-09-01 10:17:14,718][16881] InferenceWorker_p0-w0: stopping experience collection (2400 times) -[2024-09-01 10:17:16,140][16868] Signal inference workers to resume experience collection... (2400 times) -[2024-09-01 10:17:16,141][16881] InferenceWorker_p0-w0: resuming experience collection (2400 times) -[2024-09-01 10:17:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11374592. Throughput: 0: 221.6. Samples: 2460348. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:17,157][00307] Avg episode reward: [(0, '24.575')] -[2024-09-01 10:17:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11378688. Throughput: 0: 227.6. Samples: 2461236. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:17:22,157][00307] Avg episode reward: [(0, '24.409')] -[2024-09-01 10:17:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11382784. Throughput: 0: 217.5. Samples: 2462308. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:17:27,165][00307] Avg episode reward: [(0, '24.925')] -[2024-09-01 10:17:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11386880. Throughput: 0: 216.3. Samples: 2463764. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:17:32,157][00307] Avg episode reward: [(0, '25.073')] -[2024-09-01 10:17:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11390976. Throughput: 0: 223.1. Samples: 2464430. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-09-01 10:17:37,156][00307] Avg episode reward: [(0, '24.770')] -[2024-09-01 10:17:42,160][00307] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11395072. Throughput: 0: 230.2. Samples: 2466032. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:42,163][00307] Avg episode reward: [(0, '24.134')] -[2024-09-01 10:17:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11399168. Throughput: 0: 219.6. Samples: 2467030. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:47,160][00307] Avg episode reward: [(0, '24.461')] -[2024-09-01 10:17:52,154][00307] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11403264. Throughput: 0: 215.7. Samples: 2467764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:17:52,157][00307] Avg episode reward: [(0, '24.533')] -[2024-09-01 10:17:57,144][16881] Updated weights for policy 0, policy_version 2786 (0.1566) -[2024-09-01 10:17:57,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11411456. Throughput: 0: 230.0. Samples: 2469390. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:17:57,157][00307] Avg episode reward: [(0, '24.764')] -[2024-09-01 10:18:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.3, 300 sec: 888.6). Total num frames: 11411456. Throughput: 0: 223.5. Samples: 2470404. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:18:02,156][00307] Avg episode reward: [(0, '25.096')] -[2024-09-01 10:18:02,466][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002787_11415552.pth... -[2024-09-01 10:18:02,625][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002734_11198464.pth -[2024-09-01 10:18:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11415552. Throughput: 0: 216.8. Samples: 2470994. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:18:07,168][00307] Avg episode reward: [(0, '25.094')] -[2024-09-01 10:18:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11423744. Throughput: 0: 224.7. Samples: 2472418. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:18:12,158][00307] Avg episode reward: [(0, '25.101')] -[2024-09-01 10:18:17,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11427840. Throughput: 0: 224.4. Samples: 2473864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:18:17,164][00307] Avg episode reward: [(0, '25.246')] -[2024-09-01 10:18:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11431936. Throughput: 0: 221.0. Samples: 2474374. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:18:22,156][00307] Avg episode reward: [(0, '24.949')] -[2024-09-01 10:18:27,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11436032. Throughput: 0: 211.3. Samples: 2475540. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:27,157][00307] Avg episode reward: [(0, '24.992')] -[2024-09-01 10:18:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11440128. Throughput: 0: 231.4. Samples: 2477442. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:32,164][00307] Avg episode reward: [(0, '25.219')] -[2024-09-01 10:18:37,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11444224. Throughput: 0: 219.9. Samples: 2477662. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:37,161][00307] Avg episode reward: [(0, '25.687')] -[2024-09-01 10:18:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 11448320. Throughput: 0: 210.0. Samples: 2478840. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:42,166][00307] Avg episode reward: [(0, '25.659')] -[2024-09-01 10:18:44,597][16881] Updated weights for policy 0, policy_version 2796 (0.1741) -[2024-09-01 10:18:47,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11452416. Throughput: 0: 224.6. Samples: 2480512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:47,170][00307] Avg episode reward: [(0, '25.392')] -[2024-09-01 10:18:52,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11456512. Throughput: 0: 230.7. Samples: 2481378. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:52,159][00307] Avg episode reward: [(0, '25.493')] -[2024-09-01 10:18:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11460608. Throughput: 0: 222.0. Samples: 2482408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:18:57,161][00307] Avg episode reward: [(0, '25.941')] -[2024-09-01 10:19:02,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11464704. Throughput: 0: 215.9. Samples: 2483578. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:02,162][00307] Avg episode reward: [(0, '25.704')] -[2024-09-01 10:19:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11472896. Throughput: 0: 227.2. Samples: 2484598. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:07,161][00307] Avg episode reward: [(0, '26.268')] -[2024-09-01 10:19:12,156][00307] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11476992. Throughput: 0: 223.9. Samples: 2485618. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:12,159][00307] Avg episode reward: [(0, '26.209')] -[2024-09-01 10:19:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11481088. Throughput: 0: 205.3. Samples: 2486680. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:17,166][00307] Avg episode reward: [(0, '26.262')] -[2024-09-01 10:19:22,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11485184. Throughput: 0: 222.2. Samples: 2487662. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:22,165][00307] Avg episode reward: [(0, '26.323')] -[2024-09-01 10:19:27,156][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11489280. Throughput: 0: 228.7. Samples: 2489132. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:27,158][00307] Avg episode reward: [(0, '26.891')] -[2024-09-01 10:19:30,450][16881] Updated weights for policy 0, policy_version 2806 (0.1060) -[2024-09-01 10:19:32,162][00307] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11493376. Throughput: 0: 213.2. Samples: 2490106. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:32,167][00307] Avg episode reward: [(0, '26.527')] -[2024-09-01 10:19:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11497472. Throughput: 0: 209.1. Samples: 2490786. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:37,156][00307] Avg episode reward: [(0, '26.393')] -[2024-09-01 10:19:42,154][00307] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11501568. Throughput: 0: 222.4. Samples: 2492416. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:42,161][00307] Avg episode reward: [(0, '27.212')] -[2024-09-01 10:19:47,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11505664. Throughput: 0: 226.3. Samples: 2493764. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:47,160][00307] Avg episode reward: [(0, '27.577')] -[2024-09-01 10:19:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11509760. Throughput: 0: 209.4. Samples: 2494020. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:52,165][00307] Avg episode reward: [(0, '27.549')] -[2024-09-01 10:19:57,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11513856. Throughput: 0: 218.8. Samples: 2495464. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:19:57,162][00307] Avg episode reward: [(0, '27.673')] -[2024-09-01 10:20:02,012][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002813_11522048.pth... -[2024-09-01 10:20:02,157][00307] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11522048. Throughput: 0: 227.2. Samples: 2496906. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:02,164][00307] Avg episode reward: [(0, '27.524')] -[2024-09-01 10:20:02,190][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002760_11304960.pth -[2024-09-01 10:20:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11522048. Throughput: 0: 220.7. Samples: 2497592. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:07,157][00307] Avg episode reward: [(0, '27.136')] -[2024-09-01 10:20:12,154][00307] Fps is (10 sec: 409.7, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11526144. Throughput: 0: 215.7. Samples: 2498840. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:12,161][00307] Avg episode reward: [(0, '26.789')] -[2024-09-01 10:20:16,518][16881] Updated weights for policy 0, policy_version 2816 (0.0546) -[2024-09-01 10:20:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11534336. Throughput: 0: 225.5. Samples: 2500250. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:17,159][00307] Avg episode reward: [(0, '27.303')] -[2024-09-01 10:20:22,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 11538432. Throughput: 0: 229.9. Samples: 2501130. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:22,166][00307] Avg episode reward: [(0, '27.597')] -[2024-09-01 10:20:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11542528. Throughput: 0: 216.5. Samples: 2502158. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:27,167][00307] Avg episode reward: [(0, '27.077')] -[2024-09-01 10:20:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.6, 300 sec: 888.6). Total num frames: 11546624. Throughput: 0: 214.5. Samples: 2503416. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:20:32,157][00307] Avg episode reward: [(0, '27.954')] -[2024-09-01 10:20:37,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11550720. Throughput: 0: 227.1. Samples: 2504242. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:20:37,160][00307] Avg episode reward: [(0, '27.344')] -[2024-09-01 10:20:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11554816. Throughput: 0: 222.2. Samples: 2505464. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:20:42,160][00307] Avg episode reward: [(0, '27.937')] -[2024-09-01 10:20:47,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11558912. Throughput: 0: 220.3. Samples: 2506820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:20:47,156][00307] Avg episode reward: [(0, '27.779')] -[2024-09-01 10:20:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11563008. Throughput: 0: 219.9. Samples: 2507486. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:20:52,163][00307] Avg episode reward: [(0, '27.779')] -[2024-09-01 10:20:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11567104. Throughput: 0: 229.5. Samples: 2509168. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:20:57,159][00307] Avg episode reward: [(0, '27.564')] -[2024-09-01 10:21:02,156][00307] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11571200. Throughput: 0: 218.9. Samples: 2510102. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:02,158][00307] Avg episode reward: [(0, '27.927')] -[2024-09-01 10:21:04,098][16881] Updated weights for policy 0, policy_version 2826 (0.1065) -[2024-09-01 10:21:06,547][16868] Signal inference workers to stop experience collection... (2450 times) -[2024-09-01 10:21:06,606][16881] InferenceWorker_p0-w0: stopping experience collection (2450 times) -[2024-09-01 10:21:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11575296. Throughput: 0: 212.8. Samples: 2510704. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:07,162][00307] Avg episode reward: [(0, '28.197')] -[2024-09-01 10:21:08,039][16868] Signal inference workers to resume experience collection... (2450 times) -[2024-09-01 10:21:08,041][16881] InferenceWorker_p0-w0: resuming experience collection (2450 times) -[2024-09-01 10:21:12,154][00307] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11583488. Throughput: 0: 226.2. Samples: 2512336. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:12,165][00307] Avg episode reward: [(0, '29.168')] -[2024-09-01 10:21:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11587584. Throughput: 0: 220.1. Samples: 2513322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:17,156][00307] Avg episode reward: [(0, '29.361')] -[2024-09-01 10:21:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11587584. Throughput: 0: 216.8. Samples: 2513996. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:22,159][00307] Avg episode reward: [(0, '29.703')] -[2024-09-01 10:21:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11595776. Throughput: 0: 219.2. Samples: 2515326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:21:27,158][00307] Avg episode reward: [(0, '29.826')] -[2024-09-01 10:21:32,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11599872. Throughput: 0: 221.4. Samples: 2516784. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:32,162][00307] Avg episode reward: [(0, '30.003')] -[2024-09-01 10:21:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11603968. Throughput: 0: 220.5. Samples: 2517410. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:37,158][00307] Avg episode reward: [(0, '29.788')] -[2024-09-01 10:21:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11608064. Throughput: 0: 205.6. Samples: 2518422. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:42,157][00307] Avg episode reward: [(0, '29.617')] -[2024-09-01 10:21:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11612160. Throughput: 0: 227.9. Samples: 2520356. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:47,159][00307] Avg episode reward: [(0, '29.489')] -[2024-09-01 10:21:48,817][16881] Updated weights for policy 0, policy_version 2836 (0.2595) -[2024-09-01 10:21:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11616256. Throughput: 0: 228.7. Samples: 2520994. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:52,162][00307] Avg episode reward: [(0, '29.396')] -[2024-09-01 10:21:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.8). Total num frames: 11620352. Throughput: 0: 213.9. Samples: 2521960. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:21:57,157][00307] Avg episode reward: [(0, '29.649')] -[2024-09-01 10:21:59,313][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002838_11624448.pth... -[2024-09-01 10:21:59,437][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002787_11415552.pth -[2024-09-01 10:22:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11624448. Throughput: 0: 228.0. Samples: 2523584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:22:02,158][00307] Avg episode reward: [(0, '29.435')] -[2024-09-01 10:22:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11628544. Throughput: 0: 226.3. Samples: 2524178. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:22:07,166][00307] Avg episode reward: [(0, '29.411')] -[2024-09-01 10:22:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11632640. Throughput: 0: 226.4. Samples: 2525512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:22:12,156][00307] Avg episode reward: [(0, '29.663')] -[2024-09-01 10:22:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11636736. Throughput: 0: 219.9. Samples: 2526678. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:22:17,164][00307] Avg episode reward: [(0, '29.427')] -[2024-09-01 10:22:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11644928. Throughput: 0: 223.6. Samples: 2527474. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:22:22,161][00307] Avg episode reward: [(0, '29.112')] -[2024-09-01 10:22:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11649024. Throughput: 0: 230.0. Samples: 2528770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:22:27,157][00307] Avg episode reward: [(0, '30.152')] -[2024-09-01 10:22:32,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11653120. Throughput: 0: 209.9. Samples: 2529802. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:22:32,159][00307] Avg episode reward: [(0, '30.152')] -[2024-09-01 10:22:36,466][16881] Updated weights for policy 0, policy_version 2846 (0.0048) -[2024-09-01 10:22:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11657216. Throughput: 0: 215.1. Samples: 2530674. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:22:37,159][00307] Avg episode reward: [(0, '31.458')] -[2024-09-01 10:22:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11661312. Throughput: 0: 219.2. Samples: 2531826. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:22:42,162][00307] Avg episode reward: [(0, '31.809')] -[2024-09-01 10:22:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11665408. Throughput: 0: 218.3. Samples: 2533406. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:22:47,160][00307] Avg episode reward: [(0, '32.245')] -[2024-09-01 10:22:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11669504. Throughput: 0: 214.1. Samples: 2533814. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:22:52,162][00307] Avg episode reward: [(0, '31.691')] -[2024-09-01 10:22:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11673600. Throughput: 0: 216.2. Samples: 2535242. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:22:57,157][00307] Avg episode reward: [(0, '31.656')] -[2024-09-01 10:23:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11677696. Throughput: 0: 226.4. Samples: 2536868. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:23:02,157][00307] Avg episode reward: [(0, '31.928')] -[2024-09-01 10:23:07,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 11681792. Throughput: 0: 219.9. Samples: 2537372. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:23:07,161][00307] Avg episode reward: [(0, '32.346')] -[2024-09-01 10:23:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11685888. Throughput: 0: 219.9. Samples: 2538664. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:23:12,161][00307] Avg episode reward: [(0, '32.161')] -[2024-09-01 10:23:17,154][00307] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11694080. Throughput: 0: 207.7. Samples: 2539150. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:23:17,156][00307] Avg episode reward: [(0, '32.161')] -[2024-09-01 10:23:21,634][16881] Updated weights for policy 0, policy_version 2856 (0.2591) -[2024-09-01 10:23:22,156][00307] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11698176. Throughput: 0: 228.3. Samples: 2540948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:23:22,164][00307] Avg episode reward: [(0, '32.197')] -[2024-09-01 10:23:27,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11698176. Throughput: 0: 227.5. Samples: 2542064. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:23:27,156][00307] Avg episode reward: [(0, '32.191')] -[2024-09-01 10:23:32,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11706368. Throughput: 0: 221.4. Samples: 2543370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:32,156][00307] Avg episode reward: [(0, '32.242')] -[2024-09-01 10:23:37,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11710464. Throughput: 0: 229.5. Samples: 2544142. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:37,160][00307] Avg episode reward: [(0, '32.129')] -[2024-09-01 10:23:42,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11714560. Throughput: 0: 223.6. Samples: 2545304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:42,161][00307] Avg episode reward: [(0, '31.945')] -[2024-09-01 10:23:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11718656. Throughput: 0: 212.8. Samples: 2546442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:47,164][00307] Avg episode reward: [(0, '32.346')] -[2024-09-01 10:23:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11722752. Throughput: 0: 220.0. Samples: 2547270. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:52,160][00307] Avg episode reward: [(0, '32.428')] -[2024-09-01 10:23:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11726848. Throughput: 0: 231.2. Samples: 2549066. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:23:57,157][00307] Avg episode reward: [(0, '32.290')] -[2024-09-01 10:24:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11730944. Throughput: 0: 241.5. Samples: 2550018. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:02,159][00307] Avg episode reward: [(0, '31.967')] -[2024-09-01 10:24:04,184][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002865_11735040.pth... -[2024-09-01 10:24:04,297][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002813_11522048.pth -[2024-09-01 10:24:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11735040. Throughput: 0: 214.9. Samples: 2550620. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:07,157][00307] Avg episode reward: [(0, '31.777')] -[2024-09-01 10:24:08,089][16881] Updated weights for policy 0, policy_version 2866 (0.1046) -[2024-09-01 10:24:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11743232. Throughput: 0: 227.8. Samples: 2552314. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:24:12,157][00307] Avg episode reward: [(0, '31.685')] -[2024-09-01 10:24:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11743232. Throughput: 0: 222.3. Samples: 2553372. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:24:17,157][00307] Avg episode reward: [(0, '31.180')] -[2024-09-01 10:24:22,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11747328. Throughput: 0: 217.6. Samples: 2553934. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:22,164][00307] Avg episode reward: [(0, '31.586')] -[2024-09-01 10:24:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11755520. Throughput: 0: 224.2. Samples: 2555392. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:27,160][00307] Avg episode reward: [(0, '32.087')] -[2024-09-01 10:24:32,155][00307] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11759616. Throughput: 0: 229.0. Samples: 2556748. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:32,158][00307] Avg episode reward: [(0, '32.017')] -[2024-09-01 10:24:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11763712. Throughput: 0: 225.8. Samples: 2557432. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:37,165][00307] Avg episode reward: [(0, '32.440')] -[2024-09-01 10:24:42,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11767808. Throughput: 0: 208.5. Samples: 2558448. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:42,158][00307] Avg episode reward: [(0, '32.506')] -[2024-09-01 10:24:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11771904. Throughput: 0: 231.0. Samples: 2560414. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:47,160][00307] Avg episode reward: [(0, '32.718')] -[2024-09-01 10:24:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11776000. Throughput: 0: 227.8. Samples: 2560872. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:24:52,162][00307] Avg episode reward: [(0, '32.408')] -[2024-09-01 10:24:54,380][16881] Updated weights for policy 0, policy_version 2876 (0.1104) -[2024-09-01 10:24:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11780096. Throughput: 0: 212.4. Samples: 2561870. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:24:57,158][00307] Avg episode reward: [(0, '31.998')] -[2024-09-01 10:24:57,694][16868] Signal inference workers to stop experience collection... (2500 times) -[2024-09-01 10:24:57,744][16881] InferenceWorker_p0-w0: stopping experience collection (2500 times) -[2024-09-01 10:24:59,017][16868] Signal inference workers to resume experience collection... (2500 times) -[2024-09-01 10:24:59,020][16881] InferenceWorker_p0-w0: resuming experience collection (2500 times) -[2024-09-01 10:25:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11784192. Throughput: 0: 225.6. Samples: 2563524. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:25:02,162][00307] Avg episode reward: [(0, '31.850')] -[2024-09-01 10:25:07,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11792384. Throughput: 0: 231.5. Samples: 2564352. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:25:07,161][00307] Avg episode reward: [(0, '32.396')] -[2024-09-01 10:25:12,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11792384. Throughput: 0: 223.2. Samples: 2565436. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:25:12,162][00307] Avg episode reward: [(0, '32.063')] -[2024-09-01 10:25:17,154][00307] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 11796480. Throughput: 0: 218.4. Samples: 2566578. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:25:17,157][00307] Avg episode reward: [(0, '31.793')] -[2024-09-01 10:25:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11804672. Throughput: 0: 224.5. Samples: 2567534. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:25:22,160][00307] Avg episode reward: [(0, '31.901')] -[2024-09-01 10:25:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11808768. Throughput: 0: 228.8. Samples: 2568742. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:25:27,157][00307] Avg episode reward: [(0, '31.818')] -[2024-09-01 10:25:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11812864. Throughput: 0: 210.8. Samples: 2569900. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:25:32,157][00307] Avg episode reward: [(0, '32.105')] -[2024-09-01 10:25:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11816960. Throughput: 0: 217.2. Samples: 2570644. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:25:37,156][00307] Avg episode reward: [(0, '31.954')] -[2024-09-01 10:25:39,123][16881] Updated weights for policy 0, policy_version 2886 (0.1072) -[2024-09-01 10:25:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11821056. Throughput: 0: 234.9. Samples: 2572440. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 10:25:42,157][00307] Avg episode reward: [(0, '31.143')] -[2024-09-01 10:25:47,157][00307] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11825152. Throughput: 0: 218.5. Samples: 2573358. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 10:25:47,167][00307] Avg episode reward: [(0, '30.833')] -[2024-09-01 10:25:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11829248. Throughput: 0: 213.3. Samples: 2573952. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:25:52,161][00307] Avg episode reward: [(0, '30.599')] -[2024-09-01 10:25:57,154][00307] Fps is (10 sec: 1229.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11837440. Throughput: 0: 227.9. Samples: 2575690. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:25:57,158][00307] Avg episode reward: [(0, '30.949')] -[2024-09-01 10:26:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11837440. Throughput: 0: 226.2. Samples: 2576756. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:26:02,160][00307] Avg episode reward: [(0, '31.291')] -[2024-09-01 10:26:02,177][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002891_11841536.pth... -[2024-09-01 10:26:02,319][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002838_11624448.pth -[2024-09-01 10:26:07,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11841536. Throughput: 0: 219.2. Samples: 2577400. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:26:07,166][00307] Avg episode reward: [(0, '30.928')] -[2024-09-01 10:26:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11849728. Throughput: 0: 224.6. Samples: 2578848. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) -[2024-09-01 10:26:12,158][00307] Avg episode reward: [(0, '30.670')] -[2024-09-01 10:26:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11853824. Throughput: 0: 230.7. Samples: 2580280. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:26:17,157][00307] Avg episode reward: [(0, '30.551')] -[2024-09-01 10:26:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11857920. Throughput: 0: 229.1. Samples: 2580954. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:26:22,162][00307] Avg episode reward: [(0, '30.551')] -[2024-09-01 10:26:27,060][16881] Updated weights for policy 0, policy_version 2896 (0.1519) -[2024-09-01 10:26:27,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11862016. Throughput: 0: 213.2. Samples: 2582032. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:26:27,161][00307] Avg episode reward: [(0, '30.644')] -[2024-09-01 10:26:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11866112. Throughput: 0: 229.3. Samples: 2583676. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:26:32,156][00307] Avg episode reward: [(0, '31.117')] -[2024-09-01 10:26:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11870208. Throughput: 0: 229.6. Samples: 2584284. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:26:37,168][00307] Avg episode reward: [(0, '30.469')] -[2024-09-01 10:26:42,157][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11874304. Throughput: 0: 210.3. Samples: 2585156. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:26:42,160][00307] Avg episode reward: [(0, '30.824')] -[2024-09-01 10:26:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11878400. Throughput: 0: 223.1. Samples: 2586794. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:26:47,158][00307] Avg episode reward: [(0, '30.710')] -[2024-09-01 10:26:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11882496. Throughput: 0: 222.5. Samples: 2587414. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:26:52,164][00307] Avg episode reward: [(0, '30.808')] -[2024-09-01 10:26:57,155][00307] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11886592. Throughput: 0: 223.3. Samples: 2588896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:26:57,162][00307] Avg episode reward: [(0, '30.594')] -[2024-09-01 10:27:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11890688. Throughput: 0: 220.2. Samples: 2590190. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:27:02,157][00307] Avg episode reward: [(0, '30.319')] -[2024-09-01 10:27:07,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11894784. Throughput: 0: 221.0. Samples: 2590900. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:27:07,157][00307] Avg episode reward: [(0, '30.284')] -[2024-09-01 10:27:11,746][16881] Updated weights for policy 0, policy_version 2906 (0.1672) -[2024-09-01 10:27:12,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11902976. Throughput: 0: 227.6. Samples: 2592276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:27:12,161][00307] Avg episode reward: [(0, '29.965')] -[2024-09-01 10:27:17,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11902976. Throughput: 0: 212.8. Samples: 2593254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) -[2024-09-01 10:27:17,159][00307] Avg episode reward: [(0, '29.281')] -[2024-09-01 10:27:22,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11911168. Throughput: 0: 220.8. Samples: 2594222. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:27:22,156][00307] Avg episode reward: [(0, '28.672')] -[2024-09-01 10:27:27,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11915264. Throughput: 0: 231.5. Samples: 2595574. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:27:27,157][00307] Avg episode reward: [(0, '28.872')] -[2024-09-01 10:27:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11919360. Throughput: 0: 220.0. Samples: 2596694. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:32,160][00307] Avg episode reward: [(0, '28.919')] -[2024-09-01 10:27:37,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11923456. Throughput: 0: 218.0. Samples: 2597222. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:37,156][00307] Avg episode reward: [(0, '28.441')] -[2024-09-01 10:27:42,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11927552. Throughput: 0: 216.8. Samples: 2598654. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:42,166][00307] Avg episode reward: [(0, '27.939')] -[2024-09-01 10:27:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11931648. Throughput: 0: 225.6. Samples: 2600344. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:47,157][00307] Avg episode reward: [(0, '27.822')] -[2024-09-01 10:27:52,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11935744. Throughput: 0: 215.3. Samples: 2600590. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:52,158][00307] Avg episode reward: [(0, '27.822')] -[2024-09-01 10:27:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11939840. Throughput: 0: 220.0. Samples: 2602176. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:27:57,162][00307] Avg episode reward: [(0, '28.750')] -[2024-09-01 10:27:58,371][16881] Updated weights for policy 0, policy_version 2916 (0.1051) -[2024-09-01 10:28:01,850][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002917_11948032.pth... -[2024-09-01 10:28:01,969][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002865_11735040.pth -[2024-09-01 10:28:02,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 11948032. Throughput: 0: 227.6. Samples: 2603496. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:02,157][00307] Avg episode reward: [(0, '29.724')] -[2024-09-01 10:28:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11948032. Throughput: 0: 223.2. Samples: 2604264. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:07,157][00307] Avg episode reward: [(0, '29.682')] -[2024-09-01 10:28:12,154][00307] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 11952128. Throughput: 0: 216.4. Samples: 2605314. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:12,164][00307] Avg episode reward: [(0, '29.397')] -[2024-09-01 10:28:17,154][00307] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 11960320. Throughput: 0: 218.3. Samples: 2606518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:17,165][00307] Avg episode reward: [(0, '29.500')] -[2024-09-01 10:28:22,154][00307] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 11964416. Throughput: 0: 228.2. Samples: 2607490. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:22,157][00307] Avg episode reward: [(0, '29.190')] -[2024-09-01 10:28:27,155][00307] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11968512. Throughput: 0: 219.1. Samples: 2608512. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:27,163][00307] Avg episode reward: [(0, '29.190')] -[2024-09-01 10:28:32,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11972608. Throughput: 0: 211.7. Samples: 2609870. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:32,157][00307] Avg episode reward: [(0, '29.294')] -[2024-09-01 10:28:37,154][00307] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11976704. Throughput: 0: 224.7. Samples: 2610702. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:28:37,157][00307] Avg episode reward: [(0, '29.466')] -[2024-09-01 10:28:42,156][00307] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 11980800. Throughput: 0: 220.5. Samples: 2612100. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) -[2024-09-01 10:28:42,163][00307] Avg episode reward: [(0, '29.762')] -[2024-09-01 10:28:45,385][16881] Updated weights for policy 0, policy_version 2926 (0.1072) -[2024-09-01 10:28:47,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11984896. Throughput: 0: 218.9. Samples: 2613346. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:47,157][00307] Avg episode reward: [(0, '29.731')] -[2024-09-01 10:28:47,793][16868] Signal inference workers to stop experience collection... (2550 times) -[2024-09-01 10:28:47,858][16881] InferenceWorker_p0-w0: stopping experience collection (2550 times) -[2024-09-01 10:28:49,312][16868] Signal inference workers to resume experience collection... (2550 times) -[2024-09-01 10:28:49,314][16881] InferenceWorker_p0-w0: resuming experience collection (2550 times) -[2024-09-01 10:28:52,154][00307] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11988992. Throughput: 0: 212.2. Samples: 2613814. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) -[2024-09-01 10:28:52,156][00307] Avg episode reward: [(0, '30.425')] -[2024-09-01 10:28:57,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 11993088. Throughput: 0: 230.7. Samples: 2615694. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:28:57,157][00307] Avg episode reward: [(0, '30.425')] -[2024-09-01 10:29:02,154][00307] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 11997184. Throughput: 0: 226.5. Samples: 2616710. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:29:02,160][00307] Avg episode reward: [(0, '30.218')] -[2024-09-01 10:29:07,154][00307] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 12001280. Throughput: 0: 218.4. Samples: 2617320. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) -[2024-09-01 10:29:07,166][00307] Avg episode reward: [(0, '30.073')] -[2024-09-01 10:29:07,641][16868] Stopping Batcher_0... -[2024-09-01 10:29:07,642][16868] Loop batcher_evt_loop terminating... -[2024-09-01 10:29:07,645][00307] Component Batcher_0 stopped! -[2024-09-01 10:29:07,881][16881] Weights refcount: 2 0 -[2024-09-01 10:29:07,892][00307] Component InferenceWorker_p0-w0 stopped! -[2024-09-01 10:29:07,900][16881] Stopping InferenceWorker_p0-w0... -[2024-09-01 10:29:07,900][16881] Loop inference_proc0-0_evt_loop terminating... -[2024-09-01 10:29:08,160][00307] Component RolloutWorker_w7 stopped! -[2024-09-01 10:29:08,168][16889] Stopping RolloutWorker_w7... -[2024-09-01 10:29:08,174][00307] Component RolloutWorker_w5 stopped! -[2024-09-01 10:29:08,197][00307] Component RolloutWorker_w3 stopped! -[2024-09-01 10:29:08,200][16885] Stopping RolloutWorker_w3... -[2024-09-01 10:29:08,207][16885] Loop rollout_proc3_evt_loop terminating... -[2024-09-01 10:29:08,180][16888] Stopping RolloutWorker_w5... -[2024-09-01 10:29:08,169][16889] Loop rollout_proc7_evt_loop terminating... -[2024-09-01 10:29:08,233][16888] Loop rollout_proc5_evt_loop terminating... -[2024-09-01 10:29:08,264][00307] Component RolloutWorker_w1 stopped! -[2024-09-01 10:29:08,267][16883] Stopping RolloutWorker_w1... -[2024-09-01 10:29:08,296][16883] Loop rollout_proc1_evt_loop terminating... -[2024-09-01 10:29:08,325][16882] Stopping RolloutWorker_w0... -[2024-09-01 10:29:08,325][00307] Component RolloutWorker_w0 stopped! -[2024-09-01 10:29:08,359][16882] Loop rollout_proc0_evt_loop terminating... -[2024-09-01 10:29:08,391][16887] Stopping RolloutWorker_w6... -[2024-09-01 10:29:08,392][16887] Loop rollout_proc6_evt_loop terminating... -[2024-09-01 10:29:08,391][00307] Component RolloutWorker_w6 stopped! -[2024-09-01 10:29:08,425][00307] Component RolloutWorker_w2 stopped! -[2024-09-01 10:29:08,432][16884] Stopping RolloutWorker_w2... -[2024-09-01 10:29:08,460][16884] Loop rollout_proc2_evt_loop terminating... -[2024-09-01 10:29:08,490][00307] Component RolloutWorker_w4 stopped! -[2024-09-01 10:29:08,500][16886] Stopping RolloutWorker_w4... -[2024-09-01 10:29:08,540][16886] Loop rollout_proc4_evt_loop terminating... -[2024-09-01 10:29:12,813][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002932_12009472.pth... -[2024-09-01 10:29:12,894][16868] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002891_11841536.pth -[2024-09-01 10:29:12,908][16868] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002932_12009472.pth... -[2024-09-01 10:29:13,005][16868] Stopping LearnerWorker_p0... -[2024-09-01 10:29:13,007][16868] Loop learner_proc0_evt_loop terminating... -[2024-09-01 10:29:13,006][00307] Component LearnerWorker_p0 stopped! -[2024-09-01 10:29:13,016][00307] Waiting for process learner_proc0 to stop... -[2024-09-01 10:29:13,682][00307] Waiting for process inference_proc0-0 to join... -[2024-09-01 10:29:13,688][00307] Waiting for process rollout_proc0 to join... -[2024-09-01 10:29:13,695][00307] Waiting for process rollout_proc1 to join... -[2024-09-01 10:29:13,702][00307] Waiting for process rollout_proc2 to join... -[2024-09-01 10:29:13,708][00307] Waiting for process rollout_proc3 to join... -[2024-09-01 10:29:13,713][00307] Waiting for process rollout_proc4 to join... -[2024-09-01 10:29:13,721][00307] Waiting for process rollout_proc5 to join... -[2024-09-01 10:29:13,728][00307] Waiting for process rollout_proc6 to join... -[2024-09-01 10:29:13,731][00307] Waiting for process rollout_proc7 to join... -[2024-09-01 10:29:13,740][00307] Batcher 0 profile tree view: -batching: 52.5478, releasing_batches: 0.6417 -[2024-09-01 10:29:13,747][00307] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0052 - wait_policy_total: 123.4046 -update_model: 360.1671 - weight_update: 0.1600 -one_step: 0.0901 - handle_policy_step: 7506.6667 - deserialize: 243.9757, stack: 37.3286, obs_to_device_normalize: 1259.1016, forward: 5500.4170, send_messages: 178.1743 - prepare_outputs: 88.4031 - to_cpu: 9.3155 -[2024-09-01 10:29:13,749][00307] Learner 0 profile tree view: -misc: 0.0207, prepare_batch: 3426.0010 -train: 8303.8001 - epoch_init: 0.0312, minibatch_init: 0.0624, losses_postprocess: 0.4286, kl_divergence: 1.4189, after_optimizer: 6.5746 - calculate_losses: 4058.1022 - losses_init: 0.0358, forward_head: 3646.4146, bptt_initial: 9.9638, tail: 8.5294, advantages_returns: 0.6803, losses: 3.9234 - bptt: 387.2120 - bptt_forward_core: 384.9178 - update: 4235.3517 - clip: 9.7175 -[2024-09-01 10:29:13,753][00307] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 1.7284, enqueue_policy_requests: 143.8024, env_step: 4299.2702, overhead: 105.5068, complete_rollouts: 46.3876 -save_policy_outputs: 115.1593 - split_output_tensors: 37.1704 -[2024-09-01 10:29:13,757][00307] RolloutWorker_w7 profile tree view: -wait_for_trajectories: 2.1392, enqueue_policy_requests: 143.5676, env_step: 4322.2978, overhead: 101.9370, complete_rollouts: 40.6107 -save_policy_outputs: 115.4626 - split_output_tensors: 39.2462 -[2024-09-01 10:29:13,759][00307] Loop Runner_EvtLoop terminating... -[2024-09-01 10:29:13,761][00307] Runner profile tree view: -main_loop: 11833.6824 -[2024-09-01 10:29:13,763][00307] Collected {0: 12009472}, FPS: 884.7 -[2024-09-01 10:29:13,899][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 10:29:13,901][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 10:29:13,904][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 10:29:13,908][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 10:29:13,910][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 10:29:13,912][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 10:29:13,914][00307] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 10:29:13,915][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 10:29:13,916][00307] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-09-01 10:29:13,918][00307] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-09-01 10:29:13,919][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 10:29:13,920][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 10:29:13,922][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 10:29:13,923][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 10:29:13,924][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 10:29:14,051][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 10:29:14,058][00307] RunningMeanStd input shape: (1,) -[2024-09-01 10:29:14,114][00307] ConvEncoder: input_channels=3 -[2024-09-01 10:29:14,247][00307] Conv encoder output size: 512 -[2024-09-01 10:29:14,250][00307] Policy head output size: 512 -[2024-09-01 10:29:14,307][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002932_12009472.pth... -[2024-09-01 10:29:15,520][00307] Num frames 100... -[2024-09-01 10:29:15,979][00307] Num frames 200... -[2024-09-01 10:29:16,275][00307] Num frames 300... -[2024-09-01 10:29:16,532][00307] Num frames 400... -[2024-09-01 10:29:16,759][00307] Num frames 500... -[2024-09-01 10:29:16,990][00307] Num frames 600... -[2024-09-01 10:29:17,211][00307] Num frames 700... -[2024-09-01 10:29:17,461][00307] Num frames 800... -[2024-09-01 10:29:17,698][00307] Num frames 900... -[2024-09-01 10:29:17,934][00307] Num frames 1000... -[2024-09-01 10:29:18,187][00307] Num frames 1100... -[2024-09-01 10:29:18,444][00307] Num frames 1200... -[2024-09-01 10:29:18,674][00307] Num frames 1300... -[2024-09-01 10:29:18,911][00307] Num frames 1400... -[2024-09-01 10:29:19,149][00307] Num frames 1500... -[2024-09-01 10:29:19,288][00307] Avg episode rewards: #0: 37.360, true rewards: #0: 15.360 -[2024-09-01 10:29:19,292][00307] Avg episode reward: 37.360, avg true_objective: 15.360 -[2024-09-01 10:29:19,460][00307] Num frames 1600... -[2024-09-01 10:29:19,698][00307] Num frames 1700... -[2024-09-01 10:29:19,924][00307] Num frames 1800... -[2024-09-01 10:29:20,147][00307] Num frames 1900... -[2024-09-01 10:29:20,365][00307] Num frames 2000... -[2024-09-01 10:29:20,609][00307] Num frames 2100... -[2024-09-01 10:29:20,841][00307] Num frames 2200... -[2024-09-01 10:29:21,061][00307] Num frames 2300... -[2024-09-01 10:29:21,253][00307] Num frames 2400... -[2024-09-01 10:29:21,473][00307] Num frames 2500... -[2024-09-01 10:29:21,702][00307] Num frames 2600... -[2024-09-01 10:29:21,936][00307] Num frames 2700... -[2024-09-01 10:29:22,175][00307] Num frames 2800... -[2024-09-01 10:29:22,405][00307] Num frames 2900... -[2024-09-01 10:29:22,654][00307] Num frames 3000... -[2024-09-01 10:29:22,818][00307] Avg episode rewards: #0: 37.235, true rewards: #0: 15.235 -[2024-09-01 10:29:22,820][00307] Avg episode reward: 37.235, avg true_objective: 15.235 -[2024-09-01 10:29:22,944][00307] Num frames 3100... -[2024-09-01 10:29:23,177][00307] Num frames 3200... -[2024-09-01 10:29:23,418][00307] Num frames 3300... -[2024-09-01 10:29:23,672][00307] Num frames 3400... -[2024-09-01 10:29:23,889][00307] Num frames 3500... -[2024-09-01 10:29:24,102][00307] Num frames 3600... -[2024-09-01 10:29:24,314][00307] Num frames 3700... -[2024-09-01 10:29:24,554][00307] Num frames 3800... -[2024-09-01 10:29:24,780][00307] Num frames 3900... -[2024-09-01 10:29:25,018][00307] Avg episode rewards: #0: 33.267, true rewards: #0: 13.267 -[2024-09-01 10:29:25,020][00307] Avg episode reward: 33.267, avg true_objective: 13.267 -[2024-09-01 10:29:25,071][00307] Num frames 4000... -[2024-09-01 10:29:25,309][00307] Num frames 4100... -[2024-09-01 10:29:25,554][00307] Num frames 4200... -[2024-09-01 10:29:25,797][00307] Num frames 4300... -[2024-09-01 10:29:26,017][00307] Num frames 4400... -[2024-09-01 10:29:26,245][00307] Num frames 4500... -[2024-09-01 10:29:26,539][00307] Num frames 4600... -[2024-09-01 10:29:26,871][00307] Num frames 4700... -[2024-09-01 10:29:27,195][00307] Num frames 4800... -[2024-09-01 10:29:27,504][00307] Num frames 4900... -[2024-09-01 10:29:27,823][00307] Num frames 5000... -[2024-09-01 10:29:28,151][00307] Num frames 5100... -[2024-09-01 10:29:28,486][00307] Num frames 5200... -[2024-09-01 10:29:28,820][00307] Num frames 5300... -[2024-09-01 10:29:29,150][00307] Num frames 5400... -[2024-09-01 10:29:29,411][00307] Num frames 5500... -[2024-09-01 10:29:29,645][00307] Num frames 5600... -[2024-09-01 10:29:29,878][00307] Num frames 5700... -[2024-09-01 10:29:30,118][00307] Num frames 5800... -[2024-09-01 10:29:30,351][00307] Num frames 5900... -[2024-09-01 10:29:30,577][00307] Num frames 6000... -[2024-09-01 10:29:30,801][00307] Avg episode rewards: #0: 40.200, true rewards: #0: 15.200 -[2024-09-01 10:29:30,802][00307] Avg episode reward: 40.200, avg true_objective: 15.200 -[2024-09-01 10:29:30,858][00307] Num frames 6100... -[2024-09-01 10:29:31,052][00307] Num frames 6200... -[2024-09-01 10:29:31,240][00307] Num frames 6300... -[2024-09-01 10:29:31,447][00307] Num frames 6400... -[2024-09-01 10:29:31,644][00307] Num frames 6500... -[2024-09-01 10:29:31,842][00307] Num frames 6600... -[2024-09-01 10:29:32,054][00307] Num frames 6700... -[2024-09-01 10:29:32,247][00307] Num frames 6800... -[2024-09-01 10:29:32,448][00307] Num frames 6900... -[2024-09-01 10:29:32,651][00307] Num frames 7000... -[2024-09-01 10:29:32,850][00307] Num frames 7100... -[2024-09-01 10:29:33,065][00307] Num frames 7200... -[2024-09-01 10:29:33,265][00307] Num frames 7300... -[2024-09-01 10:29:33,446][00307] Avg episode rewards: #0: 38.120, true rewards: #0: 14.720 -[2024-09-01 10:29:33,448][00307] Avg episode reward: 38.120, avg true_objective: 14.720 -[2024-09-01 10:29:33,528][00307] Num frames 7400... -[2024-09-01 10:29:33,727][00307] Num frames 7500... -[2024-09-01 10:29:33,936][00307] Num frames 7600... -[2024-09-01 10:29:34,129][00307] Num frames 7700... -[2024-09-01 10:29:34,322][00307] Num frames 7800... -[2024-09-01 10:29:34,535][00307] Num frames 7900... -[2024-09-01 10:29:34,734][00307] Num frames 8000... -[2024-09-01 10:29:34,925][00307] Num frames 8100... -[2024-09-01 10:29:35,140][00307] Num frames 8200... -[2024-09-01 10:29:35,333][00307] Num frames 8300... -[2024-09-01 10:29:35,545][00307] Num frames 8400... -[2024-09-01 10:29:35,744][00307] Num frames 8500... -[2024-09-01 10:29:35,938][00307] Num frames 8600... -[2024-09-01 10:29:36,149][00307] Num frames 8700... -[2024-09-01 10:29:36,345][00307] Num frames 8800... -[2024-09-01 10:29:36,557][00307] Num frames 8900... -[2024-09-01 10:29:36,760][00307] Num frames 9000... -[2024-09-01 10:29:36,995][00307] Num frames 9100... -[2024-09-01 10:29:37,209][00307] Num frames 9200... -[2024-09-01 10:29:37,417][00307] Num frames 9300... -[2024-09-01 10:29:37,561][00307] Avg episode rewards: #0: 40.240, true rewards: #0: 15.573 -[2024-09-01 10:29:37,563][00307] Avg episode reward: 40.240, avg true_objective: 15.573 -[2024-09-01 10:29:37,674][00307] Num frames 9400... -[2024-09-01 10:29:37,869][00307] Num frames 9500... -[2024-09-01 10:29:38,075][00307] Num frames 9600... -[2024-09-01 10:29:38,276][00307] Num frames 9700... -[2024-09-01 10:29:38,481][00307] Num frames 9800... -[2024-09-01 10:29:38,681][00307] Num frames 9900... -[2024-09-01 10:29:38,874][00307] Num frames 10000... -[2024-09-01 10:29:39,096][00307] Avg episode rewards: #0: 36.685, true rewards: #0: 14.400 -[2024-09-01 10:29:39,099][00307] Avg episode reward: 36.685, avg true_objective: 14.400 -[2024-09-01 10:29:39,145][00307] Num frames 10100... -[2024-09-01 10:29:39,386][00307] Num frames 10200... -[2024-09-01 10:29:39,675][00307] Num frames 10300... -[2024-09-01 10:29:39,957][00307] Num frames 10400... -[2024-09-01 10:29:40,233][00307] Num frames 10500... -[2024-09-01 10:29:40,370][00307] Avg episode rewards: #0: 33.535, true rewards: #0: 13.160 -[2024-09-01 10:29:40,373][00307] Avg episode reward: 33.535, avg true_objective: 13.160 -[2024-09-01 10:29:40,579][00307] Num frames 10600... -[2024-09-01 10:29:40,849][00307] Num frames 10700... -[2024-09-01 10:29:41,127][00307] Num frames 10800... -[2024-09-01 10:29:41,425][00307] Num frames 10900... -[2024-09-01 10:29:41,709][00307] Num frames 11000... -[2024-09-01 10:29:41,992][00307] Num frames 11100... -[2024-09-01 10:29:42,262][00307] Num frames 11200... -[2024-09-01 10:29:42,474][00307] Num frames 11300... -[2024-09-01 10:29:42,683][00307] Num frames 11400... -[2024-09-01 10:29:42,883][00307] Num frames 11500... -[2024-09-01 10:29:43,103][00307] Num frames 11600... -[2024-09-01 10:29:43,317][00307] Num frames 11700... -[2024-09-01 10:29:43,524][00307] Num frames 11800... -[2024-09-01 10:29:43,729][00307] Num frames 11900... -[2024-09-01 10:29:43,944][00307] Num frames 12000... -[2024-09-01 10:29:44,147][00307] Num frames 12100... -[2024-09-01 10:29:44,367][00307] Num frames 12200... -[2024-09-01 10:29:44,606][00307] Avg episode rewards: #0: 34.202, true rewards: #0: 13.647 -[2024-09-01 10:29:44,609][00307] Avg episode reward: 34.202, avg true_objective: 13.647 -[2024-09-01 10:29:44,648][00307] Num frames 12300... -[2024-09-01 10:29:44,841][00307] Num frames 12400... -[2024-09-01 10:29:45,048][00307] Num frames 12500... -[2024-09-01 10:29:45,240][00307] Num frames 12600... -[2024-09-01 10:29:45,455][00307] Num frames 12700... -[2024-09-01 10:29:45,653][00307] Num frames 12800... -[2024-09-01 10:29:45,764][00307] Avg episode rewards: #0: 31.726, true rewards: #0: 12.826 -[2024-09-01 10:29:45,768][00307] Avg episode reward: 31.726, avg true_objective: 12.826 -[2024-09-01 10:31:14,821][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-01 10:31:14,891][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 10:31:14,897][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 10:31:14,900][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 10:31:14,906][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 10:31:14,908][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 10:31:14,910][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 10:31:14,915][00307] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-09-01 10:31:14,917][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 10:31:14,918][00307] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-09-01 10:31:14,920][00307] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-09-01 10:31:14,924][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 10:31:14,925][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 10:31:14,927][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 10:31:14,928][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 10:31:14,929][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 10:31:14,942][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 10:31:14,950][00307] RunningMeanStd input shape: (1,) -[2024-09-01 10:31:14,985][00307] ConvEncoder: input_channels=3 -[2024-09-01 10:31:15,090][00307] Conv encoder output size: 512 -[2024-09-01 10:31:15,095][00307] Policy head output size: 512 -[2024-09-01 10:31:15,129][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002932_12009472.pth... -[2024-09-01 10:31:15,999][00307] Num frames 100... -[2024-09-01 10:31:16,208][00307] Num frames 200... -[2024-09-01 10:31:16,396][00307] Num frames 300... -[2024-09-01 10:31:16,586][00307] Num frames 400... -[2024-09-01 10:31:16,769][00307] Num frames 500... -[2024-09-01 10:31:16,952][00307] Num frames 600... -[2024-09-01 10:31:17,151][00307] Num frames 700... -[2024-09-01 10:31:17,265][00307] Avg episode rewards: #0: 15.280, true rewards: #0: 7.280 -[2024-09-01 10:31:17,268][00307] Avg episode reward: 15.280, avg true_objective: 7.280 -[2024-09-01 10:31:17,410][00307] Num frames 800... -[2024-09-01 10:31:17,608][00307] Num frames 900... -[2024-09-01 10:31:17,810][00307] Num frames 1000... -[2024-09-01 10:31:18,007][00307] Num frames 1100... -[2024-09-01 10:31:18,213][00307] Num frames 1200... -[2024-09-01 10:31:18,421][00307] Num frames 1300... -[2024-09-01 10:31:18,627][00307] Num frames 1400... -[2024-09-01 10:31:18,829][00307] Num frames 1500... -[2024-09-01 10:31:19,038][00307] Num frames 1600... -[2024-09-01 10:31:19,248][00307] Num frames 1700... -[2024-09-01 10:31:19,364][00307] Avg episode rewards: #0: 20.130, true rewards: #0: 8.630 -[2024-09-01 10:31:19,368][00307] Avg episode reward: 20.130, avg true_objective: 8.630 -[2024-09-01 10:31:19,519][00307] Num frames 1800... -[2024-09-01 10:31:19,720][00307] Num frames 1900... -[2024-09-01 10:31:19,904][00307] Num frames 2000... -[2024-09-01 10:31:20,092][00307] Num frames 2100... -[2024-09-01 10:31:20,295][00307] Num frames 2200... -[2024-09-01 10:31:20,493][00307] Num frames 2300... -[2024-09-01 10:31:20,695][00307] Num frames 2400... -[2024-09-01 10:31:20,884][00307] Num frames 2500... -[2024-09-01 10:31:21,085][00307] Num frames 2600... -[2024-09-01 10:31:21,291][00307] Num frames 2700... -[2024-09-01 10:31:21,494][00307] Num frames 2800... -[2024-09-01 10:31:21,691][00307] Num frames 2900... -[2024-09-01 10:31:21,887][00307] Num frames 3000... -[2024-09-01 10:31:22,079][00307] Avg episode rewards: #0: 24.900, true rewards: #0: 10.233 -[2024-09-01 10:31:22,082][00307] Avg episode reward: 24.900, avg true_objective: 10.233 -[2024-09-01 10:31:22,142][00307] Num frames 3100... -[2024-09-01 10:31:22,341][00307] Num frames 3200... -[2024-09-01 10:31:22,537][00307] Num frames 3300... -[2024-09-01 10:31:22,729][00307] Num frames 3400... -[2024-09-01 10:31:22,916][00307] Num frames 3500... -[2024-09-01 10:31:23,110][00307] Num frames 3600... -[2024-09-01 10:31:23,303][00307] Num frames 3700... -[2024-09-01 10:31:23,500][00307] Num frames 3800... -[2024-09-01 10:31:23,691][00307] Num frames 3900... -[2024-09-01 10:31:23,881][00307] Num frames 4000... -[2024-09-01 10:31:24,073][00307] Num frames 4100... -[2024-09-01 10:31:24,258][00307] Num frames 4200... -[2024-09-01 10:31:24,501][00307] Avg episode rewards: #0: 24.975, true rewards: #0: 10.725 -[2024-09-01 10:31:24,503][00307] Avg episode reward: 24.975, avg true_objective: 10.725 -[2024-09-01 10:31:24,526][00307] Num frames 4300... -[2024-09-01 10:31:24,722][00307] Num frames 4400... -[2024-09-01 10:31:24,911][00307] Num frames 4500... -[2024-09-01 10:31:25,107][00307] Num frames 4600... -[2024-09-01 10:31:25,305][00307] Num frames 4700... -[2024-09-01 10:31:25,520][00307] Num frames 4800... -[2024-09-01 10:31:25,723][00307] Num frames 4900... -[2024-09-01 10:31:25,913][00307] Num frames 5000... -[2024-09-01 10:31:26,207][00307] Num frames 5100... -[2024-09-01 10:31:26,498][00307] Num frames 5200... -[2024-09-01 10:31:26,755][00307] Num frames 5300... -[2024-09-01 10:31:27,018][00307] Num frames 5400... -[2024-09-01 10:31:27,280][00307] Num frames 5500... -[2024-09-01 10:31:27,561][00307] Num frames 5600... -[2024-09-01 10:31:27,832][00307] Num frames 5700... -[2024-09-01 10:31:28,105][00307] Num frames 5800... -[2024-09-01 10:31:28,365][00307] Num frames 5900... -[2024-09-01 10:31:28,644][00307] Num frames 6000... -[2024-09-01 10:31:28,916][00307] Num frames 6100... -[2024-09-01 10:31:29,002][00307] Avg episode rewards: #0: 29.428, true rewards: #0: 12.228 -[2024-09-01 10:31:29,004][00307] Avg episode reward: 29.428, avg true_objective: 12.228 -[2024-09-01 10:31:29,165][00307] Num frames 6200... -[2024-09-01 10:31:29,349][00307] Num frames 6300... -[2024-09-01 10:31:29,551][00307] Num frames 6400... -[2024-09-01 10:31:29,741][00307] Num frames 6500... -[2024-09-01 10:31:29,924][00307] Num frames 6600... -[2024-09-01 10:31:30,117][00307] Num frames 6700... -[2024-09-01 10:31:30,305][00307] Num frames 6800... -[2024-09-01 10:31:30,510][00307] Num frames 6900... -[2024-09-01 10:31:30,720][00307] Num frames 7000... -[2024-09-01 10:31:30,914][00307] Num frames 7100... -[2024-09-01 10:31:31,118][00307] Num frames 7200... -[2024-09-01 10:31:31,310][00307] Num frames 7300... -[2024-09-01 10:31:31,558][00307] Avg episode rewards: #0: 29.657, true rewards: #0: 12.323 -[2024-09-01 10:31:31,560][00307] Avg episode reward: 29.657, avg true_objective: 12.323 -[2024-09-01 10:31:31,586][00307] Num frames 7400... -[2024-09-01 10:31:31,781][00307] Num frames 7500... -[2024-09-01 10:31:31,978][00307] Num frames 7600... -[2024-09-01 10:31:32,173][00307] Num frames 7700... -[2024-09-01 10:31:32,371][00307] Num frames 7800... -[2024-09-01 10:31:32,567][00307] Num frames 7900... -[2024-09-01 10:31:32,768][00307] Num frames 8000... -[2024-09-01 10:31:32,958][00307] Num frames 8100... -[2024-09-01 10:31:33,166][00307] Num frames 8200... -[2024-09-01 10:31:33,368][00307] Num frames 8300... -[2024-09-01 10:31:33,568][00307] Num frames 8400... -[2024-09-01 10:31:33,771][00307] Num frames 8500... -[2024-09-01 10:31:33,962][00307] Num frames 8600... -[2024-09-01 10:31:34,160][00307] Num frames 8700... -[2024-09-01 10:31:34,365][00307] Num frames 8800... -[2024-09-01 10:31:34,586][00307] Num frames 8900... -[2024-09-01 10:31:34,791][00307] Num frames 9000... -[2024-09-01 10:31:34,982][00307] Num frames 9100... -[2024-09-01 10:31:35,177][00307] Num frames 9200... -[2024-09-01 10:31:35,371][00307] Num frames 9300... -[2024-09-01 10:31:35,578][00307] Num frames 9400... -[2024-09-01 10:31:35,829][00307] Avg episode rewards: #0: 33.848, true rewards: #0: 13.563 -[2024-09-01 10:31:35,831][00307] Avg episode reward: 33.848, avg true_objective: 13.563 -[2024-09-01 10:31:35,848][00307] Num frames 9500... -[2024-09-01 10:31:36,035][00307] Num frames 9600... -[2024-09-01 10:31:36,226][00307] Num frames 9700... -[2024-09-01 10:31:36,418][00307] Num frames 9800... -[2024-09-01 10:31:36,617][00307] Num frames 9900... -[2024-09-01 10:31:36,808][00307] Num frames 10000... -[2024-09-01 10:31:36,999][00307] Num frames 10100... -[2024-09-01 10:31:37,193][00307] Num frames 10200... -[2024-09-01 10:31:37,369][00307] Avg episode rewards: #0: 31.702, true rewards: #0: 12.827 -[2024-09-01 10:31:37,371][00307] Avg episode reward: 31.702, avg true_objective: 12.827 -[2024-09-01 10:31:37,455][00307] Num frames 10300... -[2024-09-01 10:31:37,667][00307] Num frames 10400... -[2024-09-01 10:31:37,872][00307] Num frames 10500... -[2024-09-01 10:31:38,062][00307] Num frames 10600... -[2024-09-01 10:31:38,266][00307] Num frames 10700... -[2024-09-01 10:31:38,464][00307] Num frames 10800... -[2024-09-01 10:31:38,656][00307] Num frames 10900... -[2024-09-01 10:31:38,863][00307] Num frames 11000... -[2024-09-01 10:31:39,140][00307] Num frames 11100... -[2024-09-01 10:31:39,435][00307] Num frames 11200... -[2024-09-01 10:31:39,699][00307] Num frames 11300... -[2024-09-01 10:31:39,977][00307] Num frames 11400... -[2024-09-01 10:31:40,238][00307] Num frames 11500... -[2024-09-01 10:31:40,507][00307] Num frames 11600... -[2024-09-01 10:31:40,790][00307] Num frames 11700... -[2024-09-01 10:31:41,080][00307] Num frames 11800... -[2024-09-01 10:31:41,363][00307] Num frames 11900... -[2024-09-01 10:31:41,651][00307] Num frames 12000... -[2024-09-01 10:31:41,899][00307] Num frames 12100... -[2024-09-01 10:31:42,116][00307] Num frames 12200... -[2024-09-01 10:31:42,320][00307] Num frames 12300... -[2024-09-01 10:31:42,506][00307] Avg episode rewards: #0: 34.846, true rewards: #0: 13.736 -[2024-09-01 10:31:42,509][00307] Avg episode reward: 34.846, avg true_objective: 13.736 -[2024-09-01 10:31:42,589][00307] Num frames 12400... -[2024-09-01 10:31:42,793][00307] Num frames 12500... -[2024-09-01 10:31:43,015][00307] Num frames 12600... -[2024-09-01 10:31:43,222][00307] Num frames 12700... -[2024-09-01 10:31:43,423][00307] Num frames 12800... -[2024-09-01 10:31:43,633][00307] Num frames 12900... -[2024-09-01 10:31:43,834][00307] Num frames 13000... -[2024-09-01 10:31:44,050][00307] Num frames 13100... -[2024-09-01 10:31:44,256][00307] Num frames 13200... -[2024-09-01 10:31:44,456][00307] Num frames 13300... -[2024-09-01 10:31:44,657][00307] Num frames 13400... -[2024-09-01 10:31:44,866][00307] Num frames 13500... -[2024-09-01 10:31:45,088][00307] Avg episode rewards: #0: 34.183, true rewards: #0: 13.583 -[2024-09-01 10:31:45,091][00307] Avg episode reward: 34.183, avg true_objective: 13.583 -[2024-09-01 10:33:19,526][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-01 10:33:24,356][00307] The model has been pushed to https://huggingface.co/jarski/rl_course_vizdoom_health_gathering_supreme -[2024-09-01 10:33:26,056][00307] Environment doom_basic already registered, overwriting... -[2024-09-01 10:33:26,058][00307] Environment doom_two_colors_easy already registered, overwriting... -[2024-09-01 10:33:26,061][00307] Environment doom_two_colors_hard already registered, overwriting... -[2024-09-01 10:33:26,066][00307] Environment doom_dm already registered, overwriting... -[2024-09-01 10:33:26,069][00307] Environment doom_dwango5 already registered, overwriting... -[2024-09-01 10:33:26,074][00307] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-09-01 10:33:26,075][00307] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-09-01 10:33:26,078][00307] Environment doom_my_way_home already registered, overwriting... -[2024-09-01 10:33:26,079][00307] Environment doom_deadly_corridor already registered, overwriting... -[2024-09-01 10:33:26,083][00307] Environment doom_defend_the_center already registered, overwriting... -[2024-09-01 10:33:26,084][00307] Environment doom_defend_the_line already registered, overwriting... -[2024-09-01 10:33:26,086][00307] Environment doom_health_gathering already registered, overwriting... -[2024-09-01 10:33:26,087][00307] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-09-01 10:33:26,088][00307] Environment doom_battle already registered, overwriting... -[2024-09-01 10:33:26,091][00307] Environment doom_battle2 already registered, overwriting... -[2024-09-01 10:33:26,092][00307] Environment doom_duel_bots already registered, overwriting... -[2024-09-01 10:33:26,095][00307] Environment doom_deathmatch_bots already registered, overwriting... -[2024-09-01 10:33:26,096][00307] Environment doom_duel already registered, overwriting... -[2024-09-01 10:33:26,097][00307] Environment doom_deathmatch_full already registered, overwriting... -[2024-09-01 10:33:26,101][00307] Environment doom_benchmark already registered, overwriting... -[2024-09-01 10:33:26,103][00307] register_encoder_factory: -[2024-09-01 10:33:26,141][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 10:33:26,154][00307] Experiment dir /content/train_dir/default_experiment already exists! -[2024-09-01 10:33:26,156][00307] Resuming existing experiment from /content/train_dir/default_experiment... -[2024-09-01 10:33:26,159][00307] Weights and Biases integration disabled -[2024-09-01 10:33:26,170][00307] Environment var CUDA_VISIBLE_DEVICES is - -[2024-09-01 10:33:31,713][00307] Starting experiment with the following configuration: +[2024-09-01 16:06:07,851][25505] Using optimizer +[2024-09-01 16:06:07,853][25505] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000979_4009984.pth... +[2024-09-01 16:06:07,924][25505] Loading model from checkpoint +[2024-09-01 16:06:07,984][25505] Loaded experiment state at self.train_step=979, self.env_steps=4009984 +[2024-09-01 16:06:07,985][25505] Initialized policy 0 weights for model version 979 +[2024-09-01 16:06:07,990][25519] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 16:06:07,994][25505] LearnerWorker_p0 finished initialization! +[2024-09-01 16:06:07,993][25519] RunningMeanStd input shape: (1,) +[2024-09-01 16:06:08,001][00194] Heartbeat connected on LearnerWorker_p0 +[2024-09-01 16:06:08,026][25519] ConvEncoder: input_channels=3 +[2024-09-01 16:06:08,238][25519] Conv encoder output size: 512 +[2024-09-01 16:06:08,238][25519] Policy head output size: 512 +[2024-09-01 16:06:08,271][00194] Inference worker 0-0 is ready! +[2024-09-01 16:06:08,275][00194] All inference workers are ready! Signal rollout workers to start! +[2024-09-01 16:06:08,469][25522] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,472][25520] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,474][25526] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,480][25521] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,486][25525] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,477][25524] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,490][25518] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:08,492][25523] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:06:10,166][25526] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,171][25520] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,175][25522] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,526][25521] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,549][25525] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,554][25518] Decorrelating experience for 0 frames... +[2024-09-01 16:06:10,553][25523] Decorrelating experience for 0 frames... +[2024-09-01 16:06:11,487][25521] Decorrelating experience for 32 frames... +[2024-09-01 16:06:11,492][25525] Decorrelating experience for 32 frames... +[2024-09-01 16:06:11,731][25526] Decorrelating experience for 32 frames... +[2024-09-01 16:06:11,806][25524] Decorrelating experience for 0 frames... +[2024-09-01 16:06:11,986][00194] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4009984. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:06:12,433][25520] Decorrelating experience for 32 frames... +[2024-09-01 16:06:12,518][25522] Decorrelating experience for 32 frames... +[2024-09-01 16:06:12,869][25521] Decorrelating experience for 64 frames... +[2024-09-01 16:06:13,105][25523] Decorrelating experience for 32 frames... +[2024-09-01 16:06:13,232][25518] Decorrelating experience for 32 frames... +[2024-09-01 16:06:13,808][25524] Decorrelating experience for 32 frames... +[2024-09-01 16:06:14,045][25526] Decorrelating experience for 64 frames... +[2024-09-01 16:06:14,888][25521] Decorrelating experience for 96 frames... +[2024-09-01 16:06:14,957][25520] Decorrelating experience for 64 frames... +[2024-09-01 16:06:15,259][25518] Decorrelating experience for 64 frames... +[2024-09-01 16:06:16,246][25522] Decorrelating experience for 64 frames... +[2024-09-01 16:06:16,688][25524] Decorrelating experience for 64 frames... +[2024-09-01 16:06:16,988][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4009984. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:06:16,991][00194] Avg episode reward: [(0, '0.320')] +[2024-09-01 16:06:17,033][25526] Decorrelating experience for 96 frames... +[2024-09-01 16:06:17,742][25525] Decorrelating experience for 64 frames... +[2024-09-01 16:06:19,759][25518] Decorrelating experience for 96 frames... +[2024-09-01 16:06:20,199][25522] Decorrelating experience for 96 frames... +[2024-09-01 16:06:20,358][25520] Decorrelating experience for 96 frames... +[2024-09-01 16:06:20,698][25524] Decorrelating experience for 96 frames... +[2024-09-01 16:06:21,988][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4009984. Throughput: 0: 66.6. Samples: 666. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:06:21,992][00194] Avg episode reward: [(0, '0.320')] +[2024-09-01 16:06:22,087][25525] Decorrelating experience for 96 frames... +[2024-09-01 16:06:22,583][25523] Decorrelating experience for 64 frames... +[2024-09-01 16:06:23,851][25523] Decorrelating experience for 96 frames... +[2024-09-01 16:06:26,012][25505] Signal inference workers to stop experience collection... +[2024-09-01 16:06:26,059][25519] InferenceWorker_p0-w0: stopping experience collection +[2024-09-01 16:06:26,986][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4009984. Throughput: 0: 175.6. Samples: 2634. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:06:26,993][00194] Avg episode reward: [(0, '2.320')] +[2024-09-01 16:06:27,908][25505] Signal inference workers to resume experience collection... +[2024-09-01 16:06:27,910][25505] Stopping Batcher_0... +[2024-09-01 16:06:27,913][25505] Loop batcher_evt_loop terminating... +[2024-09-01 16:06:27,921][00194] Component Batcher_0 stopped! +[2024-09-01 16:06:27,947][25519] Weights refcount: 2 0 +[2024-09-01 16:06:27,950][25519] Stopping InferenceWorker_p0-w0... +[2024-09-01 16:06:27,951][25519] Loop inference_proc0-0_evt_loop terminating... +[2024-09-01 16:06:27,950][00194] Component InferenceWorker_p0-w0 stopped! +[2024-09-01 16:06:28,400][25523] Stopping RolloutWorker_w4... +[2024-09-01 16:06:28,400][00194] Component RolloutWorker_w4 stopped! +[2024-09-01 16:06:28,402][25523] Loop rollout_proc4_evt_loop terminating... +[2024-09-01 16:06:28,415][25521] Stopping RolloutWorker_w2... +[2024-09-01 16:06:28,415][00194] Component RolloutWorker_w2 stopped! +[2024-09-01 16:06:28,418][25521] Loop rollout_proc2_evt_loop terminating... +[2024-09-01 16:06:28,431][25525] Stopping RolloutWorker_w6... +[2024-09-01 16:06:28,431][00194] Component RolloutWorker_w6 stopped! +[2024-09-01 16:06:28,439][25525] Loop rollout_proc6_evt_loop terminating... +[2024-09-01 16:06:28,465][25520] Stopping RolloutWorker_w1... +[2024-09-01 16:06:28,465][00194] Component RolloutWorker_w1 stopped! +[2024-09-01 16:06:28,466][25520] Loop rollout_proc1_evt_loop terminating... +[2024-09-01 16:06:28,493][25522] Stopping RolloutWorker_w3... +[2024-09-01 16:06:28,493][00194] Component RolloutWorker_w3 stopped! +[2024-09-01 16:06:28,493][25522] Loop rollout_proc3_evt_loop terminating... +[2024-09-01 16:06:28,509][25526] Stopping RolloutWorker_w7... +[2024-09-01 16:06:28,510][00194] Component RolloutWorker_w7 stopped! +[2024-09-01 16:06:28,517][00194] Component RolloutWorker_w5 stopped! +[2024-09-01 16:06:28,523][25524] Stopping RolloutWorker_w5... +[2024-09-01 16:06:28,510][25526] Loop rollout_proc7_evt_loop terminating... +[2024-09-01 16:06:28,524][25524] Loop rollout_proc5_evt_loop terminating... +[2024-09-01 16:06:28,569][25518] Stopping RolloutWorker_w0... +[2024-09-01 16:06:28,569][00194] Component RolloutWorker_w0 stopped! +[2024-09-01 16:06:28,578][25518] Loop rollout_proc0_evt_loop terminating... +[2024-09-01 16:06:33,646][25505] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth... +[2024-09-01 16:06:33,725][25505] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000974_3989504.pth +[2024-09-01 16:06:33,737][25505] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth... +[2024-09-01 16:06:33,878][00194] Component LearnerWorker_p0 stopped! +[2024-09-01 16:06:33,885][00194] Waiting for process learner_proc0 to stop... +[2024-09-01 16:06:33,890][25505] Stopping LearnerWorker_p0... +[2024-09-01 16:06:33,891][25505] Loop learner_proc0_evt_loop terminating... +[2024-09-01 16:06:34,540][00194] Waiting for process inference_proc0-0 to join... +[2024-09-01 16:06:34,545][00194] Waiting for process rollout_proc0 to join... +[2024-09-01 16:06:34,550][00194] Waiting for process rollout_proc1 to join... +[2024-09-01 16:06:34,556][00194] Waiting for process rollout_proc2 to join... +[2024-09-01 16:06:34,560][00194] Waiting for process rollout_proc3 to join... +[2024-09-01 16:06:34,566][00194] Waiting for process rollout_proc4 to join... +[2024-09-01 16:06:34,570][00194] Waiting for process rollout_proc5 to join... +[2024-09-01 16:06:34,574][00194] Waiting for process rollout_proc6 to join... +[2024-09-01 16:06:34,580][00194] Waiting for process rollout_proc7 to join... +[2024-09-01 16:06:34,583][00194] Batcher 0 profile tree view: +batching: 0.0506, releasing_batches: 0.0020 +[2024-09-01 16:06:34,586][00194] InferenceWorker_p0-w0 profile tree view: +update_model: 0.0646 +wait_policy: 0.0001 + wait_policy_total: 9.7355 +one_step: 0.0318 + handle_policy_step: 7.4827 + deserialize: 0.2000, stack: 0.0383, obs_to_device_normalize: 1.1280, forward: 5.5730, send_messages: 0.2052 + prepare_outputs: 0.1651 + to_cpu: 0.0130 +[2024-09-01 16:06:34,590][00194] Learner 0 profile tree view: +misc: 0.0000, prepare_batch: 4.1518 +train: 6.0339 + epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0002, kl_divergence: 0.0007, after_optimizer: 0.0047 + calculate_losses: 2.2055 + losses_init: 0.0000, forward_head: 1.9855, bptt_initial: 0.0043, tail: 0.0103, advantages_returns: 0.0010, losses: 0.0028 + bptt: 0.2010 + bptt_forward_core: 0.1998 + update: 3.8215 + clip: 0.0086 +[2024-09-01 16:06:34,592][00194] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0010, enqueue_policy_requests: 0.1724, env_step: 2.7167, overhead: 0.0721, complete_rollouts: 0.0140 +save_policy_outputs: 0.1292 + split_output_tensors: 0.0193 +[2024-09-01 16:06:34,595][00194] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.0038, enqueue_policy_requests: 0.6344, env_step: 4.8570, overhead: 0.1751, complete_rollouts: 0.0165 +save_policy_outputs: 0.2269 + split_output_tensors: 0.0810 +[2024-09-01 16:06:34,599][00194] Loop Runner_EvtLoop terminating... +[2024-09-01 16:06:34,603][00194] Runner profile tree view: +main_loop: 48.6341 +[2024-09-01 16:06:34,605][00194] Collected {0: 4018176}, FPS: 168.4 +[2024-09-01 16:06:48,086][00194] Environment doom_basic already registered, overwriting... +[2024-09-01 16:06:48,089][00194] Environment doom_two_colors_easy already registered, overwriting... +[2024-09-01 16:06:48,092][00194] Environment doom_two_colors_hard already registered, overwriting... +[2024-09-01 16:06:48,097][00194] Environment doom_dm already registered, overwriting... +[2024-09-01 16:06:48,100][00194] Environment doom_dwango5 already registered, overwriting... +[2024-09-01 16:06:48,101][00194] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-09-01 16:06:48,103][00194] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-09-01 16:06:48,104][00194] Environment doom_my_way_home already registered, overwriting... +[2024-09-01 16:06:48,106][00194] Environment doom_deadly_corridor already registered, overwriting... +[2024-09-01 16:06:48,107][00194] Environment doom_defend_the_center already registered, overwriting... +[2024-09-01 16:06:48,109][00194] Environment doom_defend_the_line already registered, overwriting... +[2024-09-01 16:06:48,110][00194] Environment doom_health_gathering already registered, overwriting... +[2024-09-01 16:06:48,112][00194] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-09-01 16:06:48,113][00194] Environment doom_battle already registered, overwriting... +[2024-09-01 16:06:48,115][00194] Environment doom_battle2 already registered, overwriting... +[2024-09-01 16:06:48,116][00194] Environment doom_duel_bots already registered, overwriting... +[2024-09-01 16:06:48,117][00194] Environment doom_deathmatch_bots already registered, overwriting... +[2024-09-01 16:06:48,119][00194] Environment doom_duel already registered, overwriting... +[2024-09-01 16:06:48,121][00194] Environment doom_deathmatch_full already registered, overwriting... +[2024-09-01 16:06:48,122][00194] Environment doom_benchmark already registered, overwriting... +[2024-09-01 16:06:48,124][00194] register_encoder_factory: +[2024-09-01 16:06:48,154][00194] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-01 16:06:48,160][00194] Overriding arg 'train_for_env_steps' with value 6000000 passed from command line +[2024-09-01 16:06:48,167][00194] Experiment dir /content/train_dir/default_experiment already exists! +[2024-09-01 16:06:48,171][00194] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-09-01 16:06:48,172][00194] Weights and Biases integration disabled +[2024-09-01 16:06:48,177][00194] Environment var CUDA_VISIBLE_DEVICES is +[2024-09-01 16:06:50,270][00194] Starting experiment with the following configuration: help=False algo=APPO env=doom_health_gathering_supreme @@ -7838,7 +2862,7 @@ stats_avg=100 summaries_use_frameskip=True heartbeat_interval=20 heartbeat_reporting_interval=600 -train_for_env_steps=12000000 +train_for_env_steps=6000000 train_for_seconds=10000000000 save_every_sec=120 keep_checkpoints=2 @@ -7898,49 +2922,49 @@ res_h=72 wide_aspect_ratio=False eval_env_frameskip=1 fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --device=cpu --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'device': 'cpu', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} git_hash=unknown git_repo_name=not a git repository -[2024-09-01 10:33:31,723][00307] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-09-01 10:33:31,729][00307] Rollout worker 0 uses device cpu -[2024-09-01 10:33:31,731][00307] Rollout worker 1 uses device cpu -[2024-09-01 10:33:31,738][00307] Rollout worker 2 uses device cpu -[2024-09-01 10:33:31,741][00307] Rollout worker 3 uses device cpu -[2024-09-01 10:33:31,746][00307] Rollout worker 4 uses device cpu -[2024-09-01 10:33:31,750][00307] Rollout worker 5 uses device cpu -[2024-09-01 10:33:31,752][00307] Rollout worker 6 uses device cpu -[2024-09-01 10:33:31,755][00307] Rollout worker 7 uses device cpu -[2024-09-01 10:33:31,963][00307] InferenceWorker_p0-w0: min num requests: 2 -[2024-09-01 10:33:32,035][00307] Starting all processes... -[2024-09-01 10:33:32,043][00307] Starting process learner_proc0 -[2024-09-01 10:33:32,128][00307] Starting all processes... -[2024-09-01 10:33:32,201][00307] Starting process inference_proc0-0 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc0 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc1 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc2 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc3 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc4 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc5 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc6 -[2024-09-01 10:33:32,209][00307] Starting process rollout_proc7 -[2024-09-01 10:33:50,188][65187] Worker 0 uses CPU cores [0] -[2024-09-01 10:33:50,798][65174] Starting seed is not provided -[2024-09-01 10:33:50,798][65174] Initializing actor-critic model on device cpu -[2024-09-01 10:33:50,799][65174] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 10:33:50,802][65174] RunningMeanStd input shape: (1,) -[2024-09-01 10:33:50,882][65174] ConvEncoder: input_channels=3 -[2024-09-01 10:33:50,946][65192] Worker 4 uses CPU cores [0] -[2024-09-01 10:33:50,959][65191] Worker 3 uses CPU cores [1] -[2024-09-01 10:33:50,978][65189] Worker 1 uses CPU cores [1] -[2024-09-01 10:33:51,023][65195] Worker 7 uses CPU cores [1] -[2024-09-01 10:33:51,154][65194] Worker 5 uses CPU cores [1] -[2024-09-01 10:33:51,472][65190] Worker 2 uses CPU cores [0] -[2024-09-01 10:33:51,492][65193] Worker 6 uses CPU cores [0] -[2024-09-01 10:33:51,611][65174] Conv encoder output size: 512 -[2024-09-01 10:33:51,612][65174] Policy head output size: 512 -[2024-09-01 10:33:51,668][65174] Created Actor Critic model with architecture: -[2024-09-01 10:33:51,676][65174] ActorCriticSharedWeights( +[2024-09-01 16:06:50,273][00194] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-01 16:06:50,277][00194] Rollout worker 0 uses device cpu +[2024-09-01 16:06:50,279][00194] Rollout worker 1 uses device cpu +[2024-09-01 16:06:50,281][00194] Rollout worker 2 uses device cpu +[2024-09-01 16:06:50,283][00194] Rollout worker 3 uses device cpu +[2024-09-01 16:06:50,284][00194] Rollout worker 4 uses device cpu +[2024-09-01 16:06:50,286][00194] Rollout worker 5 uses device cpu +[2024-09-01 16:06:50,287][00194] Rollout worker 6 uses device cpu +[2024-09-01 16:06:50,288][00194] Rollout worker 7 uses device cpu +[2024-09-01 16:06:50,458][00194] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-01 16:06:50,500][00194] Starting all processes... +[2024-09-01 16:06:50,502][00194] Starting process learner_proc0 +[2024-09-01 16:06:50,557][00194] Starting all processes... +[2024-09-01 16:06:50,565][00194] Starting process inference_proc0-0 +[2024-09-01 16:06:50,566][00194] Starting process rollout_proc0 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc1 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc2 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc3 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc4 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc5 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc6 +[2024-09-01 16:06:50,568][00194] Starting process rollout_proc7 +[2024-09-01 16:07:05,585][26021] Worker 5 uses CPU cores [1] +[2024-09-01 16:07:05,609][26019] Worker 3 uses CPU cores [1] +[2024-09-01 16:07:05,647][26016] Worker 0 uses CPU cores [0] +[2024-09-01 16:07:05,921][26018] Worker 1 uses CPU cores [1] +[2024-09-01 16:07:05,924][26020] Worker 4 uses CPU cores [0] +[2024-09-01 16:07:05,960][26002] Starting seed is not provided +[2024-09-01 16:07:05,961][26002] Initializing actor-critic model on device cpu +[2024-09-01 16:07:05,961][26002] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 16:07:05,963][26002] RunningMeanStd input shape: (1,) +[2024-09-01 16:07:06,027][26022] Worker 6 uses CPU cores [0] +[2024-09-01 16:07:06,034][26002] ConvEncoder: input_channels=3 +[2024-09-01 16:07:06,109][26023] Worker 7 uses CPU cores [1] +[2024-09-01 16:07:06,119][26017] Worker 2 uses CPU cores [0] +[2024-09-01 16:07:06,249][26002] Conv encoder output size: 512 +[2024-09-01 16:07:06,250][26002] Policy head output size: 512 +[2024-09-01 16:07:06,267][26002] Created Actor Critic model with architecture: +[2024-09-01 16:07:06,267][26002] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -7981,303 +3005,1314 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-09-01 10:33:51,963][00307] Heartbeat connected on InferenceWorker_p0-w0 -[2024-09-01 10:33:51,978][00307] Heartbeat connected on RolloutWorker_w0 -[2024-09-01 10:33:51,991][00307] Heartbeat connected on RolloutWorker_w1 -[2024-09-01 10:33:52,002][00307] Heartbeat connected on RolloutWorker_w2 -[2024-09-01 10:33:52,007][00307] Heartbeat connected on RolloutWorker_w3 -[2024-09-01 10:33:52,016][00307] Heartbeat connected on RolloutWorker_w4 -[2024-09-01 10:33:52,021][00307] Heartbeat connected on RolloutWorker_w5 -[2024-09-01 10:33:52,029][00307] Heartbeat connected on RolloutWorker_w6 -[2024-09-01 10:33:52,035][00307] Heartbeat connected on RolloutWorker_w7 -[2024-09-01 10:33:52,927][00307] Heartbeat connected on Batcher_0 -[2024-09-01 10:33:52,949][65174] Using optimizer -[2024-09-01 10:33:52,951][65174] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002932_12009472.pth... -[2024-09-01 10:33:53,010][65174] Loading model from checkpoint -[2024-09-01 10:33:53,058][65174] Loaded experiment state at self.train_step=2932, self.env_steps=12009472 -[2024-09-01 10:33:53,058][65174] Initialized policy 0 weights for model version 2932 -[2024-09-01 10:33:53,064][65188] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 10:33:53,066][65174] LearnerWorker_p0 finished initialization! -[2024-09-01 10:33:53,067][00307] Heartbeat connected on LearnerWorker_p0 -[2024-09-01 10:33:53,072][65188] RunningMeanStd input shape: (1,) -[2024-09-01 10:33:53,098][65188] ConvEncoder: input_channels=3 -[2024-09-01 10:33:53,326][65188] Conv encoder output size: 512 -[2024-09-01 10:33:53,327][65188] Policy head output size: 512 -[2024-09-01 10:33:53,357][00307] Inference worker 0-0 is ready! -[2024-09-01 10:33:53,359][00307] All inference workers are ready! Signal rollout workers to start! -[2024-09-01 10:33:53,526][65187] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,545][65192] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,548][65193] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,555][65190] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,563][65191] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,561][65195] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,560][65189] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:53,566][65194] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-01 10:33:55,770][65187] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,816][65192] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,835][65193] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,830][65189] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,846][65195] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,853][65191] Decorrelating experience for 0 frames... -[2024-09-01 10:33:55,860][65190] Decorrelating experience for 0 frames... -[2024-09-01 10:33:56,171][00307] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 12009472. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 10:33:57,632][65194] Decorrelating experience for 0 frames... -[2024-09-01 10:33:57,653][65191] Decorrelating experience for 32 frames... -[2024-09-01 10:33:57,667][65195] Decorrelating experience for 32 frames... -[2024-09-01 10:33:58,359][65187] Decorrelating experience for 32 frames... -[2024-09-01 10:33:58,371][65192] Decorrelating experience for 32 frames... -[2024-09-01 10:33:58,468][65193] Decorrelating experience for 32 frames... -[2024-09-01 10:33:59,837][65189] Decorrelating experience for 32 frames... -[2024-09-01 10:33:59,889][65194] Decorrelating experience for 32 frames... -[2024-09-01 10:34:00,556][65187] Decorrelating experience for 64 frames... -[2024-09-01 10:34:00,558][65192] Decorrelating experience for 64 frames... -[2024-09-01 10:34:00,673][65195] Decorrelating experience for 64 frames... -[2024-09-01 10:34:00,692][65191] Decorrelating experience for 64 frames... -[2024-09-01 10:34:01,173][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12009472. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 10:34:02,035][65194] Decorrelating experience for 64 frames... -[2024-09-01 10:34:02,409][65191] Decorrelating experience for 96 frames... -[2024-09-01 10:34:02,791][65193] Decorrelating experience for 64 frames... -[2024-09-01 10:34:02,840][65190] Decorrelating experience for 32 frames... -[2024-09-01 10:34:02,997][65192] Decorrelating experience for 96 frames... -[2024-09-01 10:34:04,449][65189] Decorrelating experience for 64 frames... -[2024-09-01 10:34:04,647][65194] Decorrelating experience for 96 frames... -[2024-09-01 10:34:05,380][65187] Decorrelating experience for 96 frames... -[2024-09-01 10:34:05,540][65193] Decorrelating experience for 96 frames... -[2024-09-01 10:34:06,170][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12009472. Throughput: 0: 61.2. Samples: 612. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 10:34:06,177][00307] Avg episode reward: [(0, '3.492')] -[2024-09-01 10:34:07,094][65190] Decorrelating experience for 64 frames... -[2024-09-01 10:34:07,285][65195] Decorrelating experience for 96 frames... -[2024-09-01 10:34:07,295][65189] Decorrelating experience for 96 frames... -[2024-09-01 10:34:10,760][65190] Decorrelating experience for 96 frames... -[2024-09-01 10:34:11,045][65174] Signal inference workers to stop experience collection... -[2024-09-01 10:34:11,148][65188] InferenceWorker_p0-w0: stopping experience collection -[2024-09-01 10:34:11,170][00307] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 12009472. Throughput: 0: 105.9. Samples: 1588. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-01 10:34:11,179][00307] Avg episode reward: [(0, '4.038')] -[2024-09-01 10:34:13,282][65174] Signal inference workers to resume experience collection... -[2024-09-01 10:34:13,284][65174] Stopping Batcher_0... -[2024-09-01 10:34:13,284][65174] Loop batcher_evt_loop terminating... -[2024-09-01 10:34:13,312][00307] Component Batcher_0 stopped! -[2024-09-01 10:34:13,351][65188] Weights refcount: 2 0 -[2024-09-01 10:34:13,361][00307] Component InferenceWorker_p0-w0 stopped! -[2024-09-01 10:34:13,365][65188] Stopping InferenceWorker_p0-w0... -[2024-09-01 10:34:13,370][65188] Loop inference_proc0-0_evt_loop terminating... -[2024-09-01 10:34:14,036][00307] Component RolloutWorker_w5 stopped! -[2024-09-01 10:34:14,036][65194] Stopping RolloutWorker_w5... -[2024-09-01 10:34:14,050][65194] Loop rollout_proc5_evt_loop terminating... -[2024-09-01 10:34:14,067][00307] Component RolloutWorker_w7 stopped! -[2024-09-01 10:34:14,072][00307] Component RolloutWorker_w3 stopped! -[2024-09-01 10:34:14,080][65191] Stopping RolloutWorker_w3... -[2024-09-01 10:34:14,081][65191] Loop rollout_proc3_evt_loop terminating... -[2024-09-01 10:34:14,067][65195] Stopping RolloutWorker_w7... -[2024-09-01 10:34:14,089][65195] Loop rollout_proc7_evt_loop terminating... -[2024-09-01 10:34:14,093][00307] Component RolloutWorker_w2 stopped! -[2024-09-01 10:34:14,099][65190] Stopping RolloutWorker_w2... -[2024-09-01 10:34:14,101][65190] Loop rollout_proc2_evt_loop terminating... -[2024-09-01 10:34:14,126][00307] Component RolloutWorker_w0 stopped! -[2024-09-01 10:34:14,130][65187] Stopping RolloutWorker_w0... -[2024-09-01 10:34:14,136][65187] Loop rollout_proc0_evt_loop terminating... -[2024-09-01 10:34:14,192][65189] Stopping RolloutWorker_w1... -[2024-09-01 10:34:14,192][00307] Component RolloutWorker_w1 stopped! -[2024-09-01 10:34:14,208][65189] Loop rollout_proc1_evt_loop terminating... -[2024-09-01 10:34:14,239][00307] Component RolloutWorker_w4 stopped! -[2024-09-01 10:34:14,247][65192] Stopping RolloutWorker_w4... -[2024-09-01 10:34:14,248][65192] Loop rollout_proc4_evt_loop terminating... -[2024-09-01 10:34:14,263][00307] Component RolloutWorker_w6 stopped! -[2024-09-01 10:34:14,267][65193] Stopping RolloutWorker_w6... -[2024-09-01 10:34:14,267][65193] Loop rollout_proc6_evt_loop terminating... -[2024-09-01 10:34:21,389][65174] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002934_12017664.pth... -[2024-09-01 10:34:21,705][65174] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002917_11948032.pth -[2024-09-01 10:34:21,753][65174] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002934_12017664.pth... -[2024-09-01 10:34:22,098][65174] Stopping LearnerWorker_p0... -[2024-09-01 10:34:22,100][65174] Loop learner_proc0_evt_loop terminating... -[2024-09-01 10:34:22,101][00307] Component LearnerWorker_p0 stopped! -[2024-09-01 10:34:22,120][00307] Waiting for process learner_proc0 to stop... -[2024-09-01 10:34:23,094][00307] Waiting for process inference_proc0-0 to join... -[2024-09-01 10:34:23,107][00307] Waiting for process rollout_proc0 to join... -[2024-09-01 10:34:23,120][00307] Waiting for process rollout_proc1 to join... -[2024-09-01 10:34:23,130][00307] Waiting for process rollout_proc2 to join... -[2024-09-01 10:34:23,143][00307] Waiting for process rollout_proc3 to join... -[2024-09-01 10:34:23,152][00307] Waiting for process rollout_proc4 to join... -[2024-09-01 10:34:23,166][00307] Waiting for process rollout_proc5 to join... -[2024-09-01 10:34:23,178][00307] Waiting for process rollout_proc6 to join... -[2024-09-01 10:34:23,196][00307] Waiting for process rollout_proc7 to join... -[2024-09-01 10:34:23,214][00307] Batcher 0 profile tree view: -batching: 0.0664, releasing_batches: 0.0005 -[2024-09-01 10:34:23,220][00307] InferenceWorker_p0-w0 profile tree view: -update_model: 0.0290 +[2024-09-01 16:07:06,769][26002] Using optimizer +[2024-09-01 16:07:06,771][26002] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth... +[2024-09-01 16:07:06,812][26002] Loading model from checkpoint +[2024-09-01 16:07:06,841][26002] Loaded experiment state at self.train_step=981, self.env_steps=4018176 +[2024-09-01 16:07:06,842][26002] Initialized policy 0 weights for model version 981 +[2024-09-01 16:07:06,844][26002] LearnerWorker_p0 finished initialization! +[2024-09-01 16:07:06,849][26015] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 16:07:06,850][26015] RunningMeanStd input shape: (1,) +[2024-09-01 16:07:06,874][26015] ConvEncoder: input_channels=3 +[2024-09-01 16:07:07,027][26015] Conv encoder output size: 512 +[2024-09-01 16:07:07,028][26015] Policy head output size: 512 +[2024-09-01 16:07:07,050][00194] Inference worker 0-0 is ready! +[2024-09-01 16:07:07,052][00194] All inference workers are ready! Signal rollout workers to start! +[2024-09-01 16:07:07,187][26023] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,190][26019] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,193][26021] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,203][26018] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,227][26022] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,224][26016] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,252][26017] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:07,258][26020] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:07:08,177][00194] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4018176. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:07:08,257][26022] Decorrelating experience for 0 frames... +[2024-09-01 16:07:08,273][26020] Decorrelating experience for 0 frames... +[2024-09-01 16:07:09,124][26023] Decorrelating experience for 0 frames... +[2024-09-01 16:07:09,130][26021] Decorrelating experience for 0 frames... +[2024-09-01 16:07:09,129][26019] Decorrelating experience for 0 frames... +[2024-09-01 16:07:09,141][26018] Decorrelating experience for 0 frames... +[2024-09-01 16:07:09,195][26022] Decorrelating experience for 32 frames... +[2024-09-01 16:07:09,216][26020] Decorrelating experience for 32 frames... +[2024-09-01 16:07:10,447][00194] Heartbeat connected on Batcher_0 +[2024-09-01 16:07:10,453][00194] Heartbeat connected on LearnerWorker_p0 +[2024-09-01 16:07:10,493][00194] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-01 16:07:10,542][26023] Decorrelating experience for 32 frames... +[2024-09-01 16:07:10,545][26018] Decorrelating experience for 32 frames... +[2024-09-01 16:07:10,607][26017] Decorrelating experience for 0 frames... +[2024-09-01 16:07:10,649][26016] Decorrelating experience for 0 frames... +[2024-09-01 16:07:10,787][26019] Decorrelating experience for 32 frames... +[2024-09-01 16:07:10,869][26020] Decorrelating experience for 64 frames... +[2024-09-01 16:07:11,744][26021] Decorrelating experience for 32 frames... +[2024-09-01 16:07:11,852][26018] Decorrelating experience for 64 frames... +[2024-09-01 16:07:12,576][26016] Decorrelating experience for 32 frames... +[2024-09-01 16:07:12,593][26017] Decorrelating experience for 32 frames... +[2024-09-01 16:07:12,868][26022] Decorrelating experience for 64 frames... +[2024-09-01 16:07:13,171][26020] Decorrelating experience for 96 frames... +[2024-09-01 16:07:13,178][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4018176. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:07:13,611][26021] Decorrelating experience for 64 frames... +[2024-09-01 16:07:13,699][00194] Heartbeat connected on RolloutWorker_w4 +[2024-09-01 16:07:13,834][26018] Decorrelating experience for 96 frames... +[2024-09-01 16:07:14,414][00194] Heartbeat connected on RolloutWorker_w1 +[2024-09-01 16:07:15,125][26016] Decorrelating experience for 64 frames... +[2024-09-01 16:07:15,224][26023] Decorrelating experience for 64 frames... +[2024-09-01 16:07:17,374][26021] Decorrelating experience for 96 frames... +[2024-09-01 16:07:17,847][00194] Heartbeat connected on RolloutWorker_w5 +[2024-09-01 16:07:18,177][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4018176. Throughput: 0: 40.2. Samples: 402. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:07:18,184][00194] Avg episode reward: [(0, '3.420')] +[2024-09-01 16:07:18,806][26022] Decorrelating experience for 96 frames... +[2024-09-01 16:07:18,892][26017] Decorrelating experience for 64 frames... +[2024-09-01 16:07:19,271][26016] Decorrelating experience for 96 frames... +[2024-09-01 16:07:19,611][00194] Heartbeat connected on RolloutWorker_w6 +[2024-09-01 16:07:20,302][00194] Heartbeat connected on RolloutWorker_w0 +[2024-09-01 16:07:22,819][26019] Decorrelating experience for 64 frames... +[2024-09-01 16:07:23,177][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4018176. Throughput: 0: 108.4. Samples: 1626. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 16:07:23,179][00194] Avg episode reward: [(0, '4.904')] +[2024-09-01 16:07:23,689][26017] Decorrelating experience for 96 frames... +[2024-09-01 16:07:24,318][00194] Heartbeat connected on RolloutWorker_w2 +[2024-09-01 16:07:24,681][26002] Signal inference workers to stop experience collection... +[2024-09-01 16:07:24,723][26015] InferenceWorker_p0-w0: stopping experience collection +[2024-09-01 16:07:25,227][26023] Decorrelating experience for 96 frames... +[2024-09-01 16:07:25,403][00194] Heartbeat connected on RolloutWorker_w7 +[2024-09-01 16:07:25,464][26019] Decorrelating experience for 96 frames... +[2024-09-01 16:07:25,565][00194] Heartbeat connected on RolloutWorker_w3 +[2024-09-01 16:07:25,848][26002] Signal inference workers to resume experience collection... +[2024-09-01 16:07:25,849][26015] InferenceWorker_p0-w0: resuming experience collection +[2024-09-01 16:07:28,177][00194] Fps is (10 sec: 409.6, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 4022272. Throughput: 0: 164.8. Samples: 3296. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 16:07:28,185][00194] Avg episode reward: [(0, '4.277')] +[2024-09-01 16:07:33,179][00194] Fps is (10 sec: 819.0, 60 sec: 327.7, 300 sec: 327.7). Total num frames: 4026368. Throughput: 0: 149.4. Samples: 3736. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 16:07:33,183][00194] Avg episode reward: [(0, '7.907')] +[2024-09-01 16:07:38,179][00194] Fps is (10 sec: 819.1, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 4030464. Throughput: 0: 148.7. Samples: 4460. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:07:38,188][00194] Avg episode reward: [(0, '8.065')] +[2024-09-01 16:07:43,177][00194] Fps is (10 sec: 819.4, 60 sec: 468.1, 300 sec: 468.1). Total num frames: 4034560. Throughput: 0: 166.7. Samples: 5836. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:07:43,184][00194] Avg episode reward: [(0, '8.912')] +[2024-09-01 16:07:48,177][00194] Fps is (10 sec: 819.3, 60 sec: 512.0, 300 sec: 512.0). Total num frames: 4038656. Throughput: 0: 163.5. Samples: 6540. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:07:48,180][00194] Avg episode reward: [(0, '9.919')] +[2024-09-01 16:07:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 4042752. Throughput: 0: 184.8. Samples: 8314. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:07:53,180][00194] Avg episode reward: [(0, '10.831')] +[2024-09-01 16:07:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 573.4, 300 sec: 573.4). Total num frames: 4046848. Throughput: 0: 203.1. Samples: 9140. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:07:58,183][00194] Avg episode reward: [(0, '11.359')] +[2024-09-01 16:08:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 4050944. Throughput: 0: 216.0. Samples: 10124. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:08:03,185][00194] Avg episode reward: [(0, '12.138')] +[2024-09-01 16:08:07,361][26015] Updated weights for policy 0, policy_version 991 (0.1120) +[2024-09-01 16:08:08,177][00194] Fps is (10 sec: 1228.8, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 4059136. Throughput: 0: 223.2. Samples: 11668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:08,183][00194] Avg episode reward: [(0, '12.743')] +[2024-09-01 16:08:13,182][00194] Fps is (10 sec: 1228.2, 60 sec: 750.9, 300 sec: 693.1). Total num frames: 4063232. Throughput: 0: 203.1. Samples: 12436. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:13,193][00194] Avg episode reward: [(0, '13.246')] +[2024-09-01 16:08:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 702.2). Total num frames: 4067328. Throughput: 0: 216.7. Samples: 13488. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:18,185][00194] Avg episode reward: [(0, '13.947')] +[2024-09-01 16:08:23,179][00194] Fps is (10 sec: 819.5, 60 sec: 887.4, 300 sec: 710.0). Total num frames: 4071424. Throughput: 0: 230.4. Samples: 14830. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:23,181][00194] Avg episode reward: [(0, '14.673')] +[2024-09-01 16:08:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 716.8). Total num frames: 4075520. Throughput: 0: 240.9. Samples: 16678. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:28,180][00194] Avg episode reward: [(0, '14.769')] +[2024-09-01 16:08:33,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 722.8). Total num frames: 4079616. Throughput: 0: 229.7. Samples: 16876. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:33,191][00194] Avg episode reward: [(0, '14.831')] +[2024-09-01 16:08:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 728.2). Total num frames: 4083712. Throughput: 0: 228.4. Samples: 18592. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:38,180][00194] Avg episode reward: [(0, '15.019')] +[2024-09-01 16:08:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 776.1). Total num frames: 4091904. Throughput: 0: 224.6. Samples: 19246. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:08:43,179][00194] Avg episode reward: [(0, '15.753')] +[2024-09-01 16:08:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 778.2). Total num frames: 4096000. Throughput: 0: 235.2. Samples: 20710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:08:48,181][00194] Avg episode reward: [(0, '15.722')] +[2024-09-01 16:08:53,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 741.2). Total num frames: 4096000. Throughput: 0: 227.3. Samples: 21896. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:08:53,184][00194] Avg episode reward: [(0, '15.941')] +[2024-09-01 16:08:53,307][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001001_4100096.pth... +[2024-09-01 16:08:53,313][26015] Updated weights for policy 0, policy_version 1001 (0.2141) +[2024-09-01 16:08:53,423][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000979_4009984.pth +[2024-09-01 16:08:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 782.0). Total num frames: 4104192. Throughput: 0: 239.6. Samples: 23218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:08:58,179][00194] Avg episode reward: [(0, '16.570')] +[2024-09-01 16:09:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 783.6). Total num frames: 4108288. Throughput: 0: 236.9. Samples: 24148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:03,180][00194] Avg episode reward: [(0, '17.257')] +[2024-09-01 16:09:08,181][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 785.0). Total num frames: 4112384. Throughput: 0: 230.3. Samples: 25192. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:08,184][00194] Avg episode reward: [(0, '17.191')] +[2024-09-01 16:09:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 786.4). Total num frames: 4116480. Throughput: 0: 222.3. Samples: 26682. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:13,179][00194] Avg episode reward: [(0, '18.269')] +[2024-09-01 16:09:18,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 787.7). Total num frames: 4120576. Throughput: 0: 237.4. Samples: 27560. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:18,183][00194] Avg episode reward: [(0, '19.897')] +[2024-09-01 16:09:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 788.9). Total num frames: 4124672. Throughput: 0: 232.3. Samples: 29044. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:23,180][00194] Avg episode reward: [(0, '19.967')] +[2024-09-01 16:09:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 789.9). Total num frames: 4128768. Throughput: 0: 244.5. Samples: 30250. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:28,187][00194] Avg episode reward: [(0, '20.032')] +[2024-09-01 16:09:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 4136960. Throughput: 0: 226.6. Samples: 30908. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:33,180][00194] Avg episode reward: [(0, '21.086')] +[2024-09-01 16:09:36,538][26015] Updated weights for policy 0, policy_version 1011 (0.2612) +[2024-09-01 16:09:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 4141056. Throughput: 0: 235.8. Samples: 32508. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:38,182][00194] Avg episode reward: [(0, '21.683')] +[2024-09-01 16:09:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 4145152. Throughput: 0: 233.7. Samples: 33734. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:09:43,180][00194] Avg episode reward: [(0, '21.870')] +[2024-09-01 16:09:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 4149248. Throughput: 0: 226.2. Samples: 34326. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:09:48,180][00194] Avg episode reward: [(0, '21.780')] +[2024-09-01 16:09:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 4153344. Throughput: 0: 236.4. Samples: 35830. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:53,180][00194] Avg episode reward: [(0, '22.352')] +[2024-09-01 16:09:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 4157440. Throughput: 0: 238.3. Samples: 37406. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:09:58,180][00194] Avg episode reward: [(0, '22.221')] +[2024-09-01 16:10:03,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 819.2). Total num frames: 4161536. Throughput: 0: 230.8. Samples: 37948. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:10:03,197][00194] Avg episode reward: [(0, '22.833')] +[2024-09-01 16:10:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 4165632. Throughput: 0: 233.2. Samples: 39540. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:10:08,186][00194] Avg episode reward: [(0, '22.833')] +[2024-09-01 16:10:13,177][00194] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 841.3). Total num frames: 4173824. Throughput: 0: 233.6. Samples: 40762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:10:13,185][00194] Avg episode reward: [(0, '22.382')] +[2024-09-01 16:10:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 840.8). Total num frames: 4177920. Throughput: 0: 238.2. Samples: 41626. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:10:18,183][00194] Avg episode reward: [(0, '22.234')] +[2024-09-01 16:10:22,418][26015] Updated weights for policy 0, policy_version 1021 (0.1004) +[2024-09-01 16:10:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 840.2). Total num frames: 4182016. Throughput: 0: 225.6. Samples: 42662. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:10:23,179][00194] Avg episode reward: [(0, '22.592')] +[2024-09-01 16:10:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 839.7). Total num frames: 4186112. Throughput: 0: 235.5. Samples: 44332. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:10:28,180][00194] Avg episode reward: [(0, '22.806')] +[2024-09-01 16:10:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 839.2). Total num frames: 4190208. Throughput: 0: 241.3. Samples: 45184. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:10:33,180][00194] Avg episode reward: [(0, '23.284')] +[2024-09-01 16:10:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 838.7). Total num frames: 4194304. Throughput: 0: 233.5. Samples: 46338. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:10:38,182][00194] Avg episode reward: [(0, '23.781')] +[2024-09-01 16:10:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 838.3). Total num frames: 4198400. Throughput: 0: 228.7. Samples: 47696. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:10:43,182][00194] Avg episode reward: [(0, '24.124')] +[2024-09-01 16:10:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 856.4). Total num frames: 4206592. Throughput: 0: 237.8. Samples: 48648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:10:48,180][00194] Avg episode reward: [(0, '24.933')] +[2024-09-01 16:10:52,079][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001028_4210688.pth... +[2024-09-01 16:10:52,205][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000981_4018176.pth +[2024-09-01 16:10:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 855.6). Total num frames: 4210688. Throughput: 0: 226.8. Samples: 49744. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:10:53,184][00194] Avg episode reward: [(0, '24.570')] +[2024-09-01 16:10:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 854.8). Total num frames: 4214784. Throughput: 0: 224.5. Samples: 50866. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:10:58,180][00194] Avg episode reward: [(0, '23.878')] +[2024-09-01 16:11:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 854.1). Total num frames: 4218880. Throughput: 0: 226.7. Samples: 51828. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:03,180][00194] Avg episode reward: [(0, '24.630')] +[2024-09-01 16:11:05,855][26015] Updated weights for policy 0, policy_version 1031 (0.1467) +[2024-09-01 16:11:08,178][00194] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 853.3). Total num frames: 4222976. Throughput: 0: 238.5. Samples: 53394. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:08,181][00194] Avg episode reward: [(0, '24.416')] +[2024-09-01 16:11:08,999][26002] Signal inference workers to stop experience collection... (50 times) +[2024-09-01 16:11:09,060][26015] InferenceWorker_p0-w0: stopping experience collection (50 times) +[2024-09-01 16:11:10,186][26002] Signal inference workers to resume experience collection... (50 times) +[2024-09-01 16:11:10,187][26015] InferenceWorker_p0-w0: resuming experience collection (50 times) +[2024-09-01 16:11:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 852.6). Total num frames: 4227072. Throughput: 0: 223.8. Samples: 54402. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:13,184][00194] Avg episode reward: [(0, '23.874')] +[2024-09-01 16:11:18,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 852.0). Total num frames: 4231168. Throughput: 0: 217.6. Samples: 54976. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:18,179][00194] Avg episode reward: [(0, '23.679')] +[2024-09-01 16:11:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 851.3). Total num frames: 4235264. Throughput: 0: 236.1. Samples: 56962. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:11:23,185][00194] Avg episode reward: [(0, '24.322')] +[2024-09-01 16:11:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 850.7). Total num frames: 4239360. Throughput: 0: 229.0. Samples: 58000. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:11:28,184][00194] Avg episode reward: [(0, '24.549')] +[2024-09-01 16:11:33,178][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 850.1). Total num frames: 4243456. Throughput: 0: 220.7. Samples: 58580. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:11:33,189][00194] Avg episode reward: [(0, '24.854')] +[2024-09-01 16:11:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 849.5). Total num frames: 4247552. Throughput: 0: 220.2. Samples: 59654. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:11:38,184][00194] Avg episode reward: [(0, '24.462')] +[2024-09-01 16:11:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 849.0). Total num frames: 4251648. Throughput: 0: 222.4. Samples: 60874. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:43,184][00194] Avg episode reward: [(0, '24.454')] +[2024-09-01 16:11:48,179][00194] Fps is (10 sec: 819.0, 60 sec: 819.2, 300 sec: 848.4). Total num frames: 4255744. Throughput: 0: 214.0. Samples: 61460. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:48,183][00194] Avg episode reward: [(0, '24.499')] +[2024-09-01 16:11:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 847.9). Total num frames: 4259840. Throughput: 0: 204.0. Samples: 62572. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:53,180][00194] Avg episode reward: [(0, '24.757')] +[2024-09-01 16:11:54,518][26015] Updated weights for policy 0, policy_version 1041 (0.1754) +[2024-09-01 16:11:58,177][00194] Fps is (10 sec: 819.4, 60 sec: 819.2, 300 sec: 847.4). Total num frames: 4263936. Throughput: 0: 215.5. Samples: 64100. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:11:58,182][00194] Avg episode reward: [(0, '24.680')] +[2024-09-01 16:12:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 860.9). Total num frames: 4272128. Throughput: 0: 225.3. Samples: 65116. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:12:03,180][00194] Avg episode reward: [(0, '25.100')] +[2024-09-01 16:12:08,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4276224. Throughput: 0: 204.6. Samples: 66168. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:08,182][00194] Avg episode reward: [(0, '25.220')] +[2024-09-01 16:12:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4280320. Throughput: 0: 210.9. Samples: 67492. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:13,184][00194] Avg episode reward: [(0, '25.315')] +[2024-09-01 16:12:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4284416. Throughput: 0: 213.9. Samples: 68206. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:12:18,182][00194] Avg episode reward: [(0, '25.625')] +[2024-09-01 16:12:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4288512. Throughput: 0: 225.8. Samples: 69814. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:12:23,180][00194] Avg episode reward: [(0, '25.851')] +[2024-09-01 16:12:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4292608. Throughput: 0: 222.7. Samples: 70894. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:12:28,185][00194] Avg episode reward: [(0, '25.901')] +[2024-09-01 16:12:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4296704. Throughput: 0: 222.2. Samples: 71458. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:12:33,180][00194] Avg episode reward: [(0, '26.142')] +[2024-09-01 16:12:37,639][26002] Saving new best policy, reward=26.142! +[2024-09-01 16:12:37,655][26015] Updated weights for policy 0, policy_version 1051 (0.1676) +[2024-09-01 16:12:38,179][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4304896. Throughput: 0: 238.1. Samples: 73286. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:38,189][00194] Avg episode reward: [(0, '26.086')] +[2024-09-01 16:12:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4304896. Throughput: 0: 226.9. Samples: 74310. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:43,184][00194] Avg episode reward: [(0, '25.556')] +[2024-09-01 16:12:48,177][00194] Fps is (10 sec: 819.4, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 4313088. Throughput: 0: 218.8. Samples: 74962. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:48,189][00194] Avg episode reward: [(0, '25.942')] +[2024-09-01 16:12:51,983][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001054_4317184.pth... +[2024-09-01 16:12:52,091][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001001_4100096.pth +[2024-09-01 16:12:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4317184. Throughput: 0: 228.3. Samples: 76440. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:53,180][00194] Avg episode reward: [(0, '25.123')] +[2024-09-01 16:12:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4321280. Throughput: 0: 232.6. Samples: 77960. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:12:58,183][00194] Avg episode reward: [(0, '25.533')] +[2024-09-01 16:13:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4325376. Throughput: 0: 226.7. Samples: 78406. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:13:03,179][00194] Avg episode reward: [(0, '25.730')] +[2024-09-01 16:13:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4329472. Throughput: 0: 222.8. Samples: 79842. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:13:08,185][00194] Avg episode reward: [(0, '26.175')] +[2024-09-01 16:13:09,985][26002] Saving new best policy, reward=26.175! +[2024-09-01 16:13:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4333568. Throughput: 0: 235.3. Samples: 81482. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:13:13,182][00194] Avg episode reward: [(0, '25.694')] +[2024-09-01 16:13:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4337664. Throughput: 0: 235.0. Samples: 82034. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:13:18,180][00194] Avg episode reward: [(0, '26.236')] +[2024-09-01 16:13:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4341760. Throughput: 0: 218.6. Samples: 83124. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:13:23,180][00194] Avg episode reward: [(0, '26.997')] +[2024-09-01 16:13:24,045][26002] Saving new best policy, reward=26.236! +[2024-09-01 16:13:24,052][26015] Updated weights for policy 0, policy_version 1061 (0.0564) +[2024-09-01 16:13:27,854][26002] Saving new best policy, reward=26.997! +[2024-09-01 16:13:28,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4349952. Throughput: 0: 228.0. Samples: 84570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:13:28,180][00194] Avg episode reward: [(0, '26.932')] +[2024-09-01 16:13:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4354048. Throughput: 0: 234.2. Samples: 85500. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:13:33,179][00194] Avg episode reward: [(0, '27.021')] +[2024-09-01 16:13:37,194][26002] Saving new best policy, reward=27.021! +[2024-09-01 16:13:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4358144. Throughput: 0: 222.4. Samples: 86446. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:13:38,183][00194] Avg episode reward: [(0, '27.035')] +[2024-09-01 16:13:41,904][26002] Saving new best policy, reward=27.035! +[2024-09-01 16:13:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4362240. Throughput: 0: 221.6. Samples: 87930. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:13:43,184][00194] Avg episode reward: [(0, '26.874')] +[2024-09-01 16:13:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 4366336. Throughput: 0: 225.2. Samples: 88540. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:13:48,179][00194] Avg episode reward: [(0, '26.624')] +[2024-09-01 16:13:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4370432. Throughput: 0: 229.1. Samples: 90150. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:13:53,180][00194] Avg episode reward: [(0, '26.624')] +[2024-09-01 16:13:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4374528. Throughput: 0: 217.7. Samples: 91278. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:13:58,184][00194] Avg episode reward: [(0, '26.628')] +[2024-09-01 16:14:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4378624. Throughput: 0: 223.2. Samples: 92076. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:14:03,186][00194] Avg episode reward: [(0, '25.625')] +[2024-09-01 16:14:07,921][26015] Updated weights for policy 0, policy_version 1071 (0.1072) +[2024-09-01 16:14:08,178][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4386816. Throughput: 0: 233.1. Samples: 93612. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:14:08,185][00194] Avg episode reward: [(0, '25.526')] +[2024-09-01 16:14:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4390912. Throughput: 0: 224.5. Samples: 94674. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:13,181][00194] Avg episode reward: [(0, '25.796')] +[2024-09-01 16:14:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4395008. Throughput: 0: 219.4. Samples: 95372. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:18,180][00194] Avg episode reward: [(0, '25.692')] +[2024-09-01 16:14:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4399104. Throughput: 0: 228.6. Samples: 96732. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:23,182][00194] Avg episode reward: [(0, '25.448')] +[2024-09-01 16:14:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4403200. Throughput: 0: 236.1. Samples: 98556. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:28,181][00194] Avg episode reward: [(0, '24.703')] +[2024-09-01 16:14:33,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4407296. Throughput: 0: 229.3. Samples: 98858. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:33,184][00194] Avg episode reward: [(0, '24.815')] +[2024-09-01 16:14:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4411392. Throughput: 0: 223.9. Samples: 100224. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:14:38,179][00194] Avg episode reward: [(0, '24.032')] +[2024-09-01 16:14:43,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4415488. Throughput: 0: 235.2. Samples: 101864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:14:43,185][00194] Avg episode reward: [(0, '23.589')] +[2024-09-01 16:14:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4419584. Throughput: 0: 230.4. Samples: 102444. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:14:48,180][00194] Avg episode reward: [(0, '23.996')] +[2024-09-01 16:14:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4423680. Throughput: 0: 222.4. Samples: 103622. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:14:53,185][00194] Avg episode reward: [(0, '23.741')] +[2024-09-01 16:14:53,813][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001081_4427776.pth... +[2024-09-01 16:14:53,817][26015] Updated weights for policy 0, policy_version 1081 (0.2107) +[2024-09-01 16:14:53,927][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001028_4210688.pth +[2024-09-01 16:14:56,082][26002] Signal inference workers to stop experience collection... (100 times) +[2024-09-01 16:14:56,152][26015] InferenceWorker_p0-w0: stopping experience collection (100 times) +[2024-09-01 16:14:57,565][26002] Signal inference workers to resume experience collection... (100 times) +[2024-09-01 16:14:57,566][26015] InferenceWorker_p0-w0: resuming experience collection (100 times) +[2024-09-01 16:14:58,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4431872. Throughput: 0: 231.9. Samples: 105108. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:14:58,182][00194] Avg episode reward: [(0, '24.017')] +[2024-09-01 16:15:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4435968. Throughput: 0: 233.4. Samples: 105874. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:15:03,180][00194] Avg episode reward: [(0, '23.470')] +[2024-09-01 16:15:08,185][00194] Fps is (10 sec: 818.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4440064. Throughput: 0: 229.3. Samples: 107050. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:08,198][00194] Avg episode reward: [(0, '22.411')] +[2024-09-01 16:15:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4444160. Throughput: 0: 223.7. Samples: 108622. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:13,179][00194] Avg episode reward: [(0, '22.251')] +[2024-09-01 16:15:18,177][00194] Fps is (10 sec: 819.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4448256. Throughput: 0: 232.1. Samples: 109304. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:18,180][00194] Avg episode reward: [(0, '22.522')] +[2024-09-01 16:15:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4452352. Throughput: 0: 233.8. Samples: 110744. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:23,180][00194] Avg episode reward: [(0, '22.716')] +[2024-09-01 16:15:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4456448. Throughput: 0: 224.8. Samples: 111978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:28,179][00194] Avg episode reward: [(0, '22.022')] +[2024-09-01 16:15:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 4464640. Throughput: 0: 226.5. Samples: 112638. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:33,183][00194] Avg episode reward: [(0, '22.062')] +[2024-09-01 16:15:37,299][26015] Updated weights for policy 0, policy_version 1091 (0.1942) +[2024-09-01 16:15:38,180][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4468736. Throughput: 0: 235.2. Samples: 114206. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:38,183][00194] Avg episode reward: [(0, '21.303')] +[2024-09-01 16:15:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4472832. Throughput: 0: 225.9. Samples: 115272. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:43,180][00194] Avg episode reward: [(0, '21.581')] +[2024-09-01 16:15:48,177][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4476928. Throughput: 0: 224.9. Samples: 115994. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:48,179][00194] Avg episode reward: [(0, '22.360')] +[2024-09-01 16:15:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4481024. Throughput: 0: 237.6. Samples: 117742. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:53,181][00194] Avg episode reward: [(0, '21.657')] +[2024-09-01 16:15:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4485120. Throughput: 0: 236.6. Samples: 119268. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:15:58,180][00194] Avg episode reward: [(0, '21.595')] +[2024-09-01 16:16:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4489216. Throughput: 0: 229.3. Samples: 119624. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:16:03,183][00194] Avg episode reward: [(0, '22.423')] +[2024-09-01 16:16:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 4493312. Throughput: 0: 234.3. Samples: 121288. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:16:08,189][00194] Avg episode reward: [(0, '22.373')] +[2024-09-01 16:16:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4497408. Throughput: 0: 221.8. Samples: 121960. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:16:13,181][00194] Avg episode reward: [(0, '22.370')] +[2024-09-01 16:16:18,180][00194] Fps is (10 sec: 409.5, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4497408. Throughput: 0: 214.6. Samples: 122294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:16:18,186][00194] Avg episode reward: [(0, '22.165')] +[2024-09-01 16:16:23,177][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4501504. Throughput: 0: 196.3. Samples: 123040. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:16:23,183][00194] Avg episode reward: [(0, '22.188')] +[2024-09-01 16:16:28,177][00194] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4505600. Throughput: 0: 202.1. Samples: 124366. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:16:28,182][00194] Avg episode reward: [(0, '22.167')] +[2024-09-01 16:16:28,861][26015] Updated weights for policy 0, policy_version 1101 (0.2124) +[2024-09-01 16:16:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 4513792. Throughput: 0: 207.0. Samples: 125310. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:16:33,180][00194] Avg episode reward: [(0, '22.660')] +[2024-09-01 16:16:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 4517888. Throughput: 0: 193.6. Samples: 126456. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:16:38,184][00194] Avg episode reward: [(0, '22.608')] +[2024-09-01 16:16:43,177][00194] Fps is (10 sec: 409.6, 60 sec: 750.9, 300 sec: 888.6). Total num frames: 4517888. Throughput: 0: 183.9. Samples: 127542. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:16:43,183][00194] Avg episode reward: [(0, '21.767')] +[2024-09-01 16:16:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 4526080. Throughput: 0: 198.0. Samples: 128536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:16:48,184][00194] Avg episode reward: [(0, '21.312')] +[2024-09-01 16:16:51,127][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001106_4530176.pth... +[2024-09-01 16:16:51,247][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001054_4317184.pth +[2024-09-01 16:16:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 4530176. Throughput: 0: 193.5. Samples: 129996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:16:53,190][00194] Avg episode reward: [(0, '21.431')] +[2024-09-01 16:16:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4534272. Throughput: 0: 199.9. Samples: 130954. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:16:58,182][00194] Avg episode reward: [(0, '21.104')] +[2024-09-01 16:17:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4538368. Throughput: 0: 207.7. Samples: 131642. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:17:03,179][00194] Avg episode reward: [(0, '20.777')] +[2024-09-01 16:17:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4542464. Throughput: 0: 229.0. Samples: 133346. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:17:08,180][00194] Avg episode reward: [(0, '21.631')] +[2024-09-01 16:17:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4546560. Throughput: 0: 224.0. Samples: 134448. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:17:13,186][00194] Avg episode reward: [(0, '21.594')] +[2024-09-01 16:17:14,987][26015] Updated weights for policy 0, policy_version 1111 (0.1004) +[2024-09-01 16:17:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4550656. Throughput: 0: 213.6. Samples: 134920. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:17:18,185][00194] Avg episode reward: [(0, '21.538')] +[2024-09-01 16:17:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4554752. Throughput: 0: 224.7. Samples: 136568. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:17:23,185][00194] Avg episode reward: [(0, '22.171')] +[2024-09-01 16:17:28,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4562944. Throughput: 0: 231.0. Samples: 137936. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:17:28,179][00194] Avg episode reward: [(0, '22.637')] +[2024-09-01 16:17:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 4562944. Throughput: 0: 225.6. Samples: 138686. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:17:33,185][00194] Avg episode reward: [(0, '22.898')] +[2024-09-01 16:17:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4571136. Throughput: 0: 216.4. Samples: 139736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:17:38,180][00194] Avg episode reward: [(0, '23.147')] +[2024-09-01 16:17:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4575232. Throughput: 0: 225.0. Samples: 141078. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:17:43,180][00194] Avg episode reward: [(0, '23.221')] +[2024-09-01 16:17:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4579328. Throughput: 0: 229.0. Samples: 141946. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:17:48,184][00194] Avg episode reward: [(0, '23.405')] +[2024-09-01 16:17:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4583424. Throughput: 0: 215.0. Samples: 143022. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:17:53,182][00194] Avg episode reward: [(0, '23.405')] +[2024-09-01 16:17:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4587520. Throughput: 0: 226.2. Samples: 144626. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:17:58,186][00194] Avg episode reward: [(0, '23.392')] +[2024-09-01 16:18:00,196][26015] Updated weights for policy 0, policy_version 1121 (0.0537) +[2024-09-01 16:18:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4591616. Throughput: 0: 231.2. Samples: 145324. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:18:03,186][00194] Avg episode reward: [(0, '23.385')] +[2024-09-01 16:18:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4595712. Throughput: 0: 225.6. Samples: 146718. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:18:08,180][00194] Avg episode reward: [(0, '23.552')] +[2024-09-01 16:18:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4599808. Throughput: 0: 218.5. Samples: 147768. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:18:13,182][00194] Avg episode reward: [(0, '23.662')] +[2024-09-01 16:18:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4608000. Throughput: 0: 222.4. Samples: 148692. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:18:18,189][00194] Avg episode reward: [(0, '23.160')] +[2024-09-01 16:18:23,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4612096. Throughput: 0: 229.2. Samples: 150052. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:18:23,180][00194] Avg episode reward: [(0, '23.275')] +[2024-09-01 16:18:28,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4616192. Throughput: 0: 223.0. Samples: 151112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:18:28,184][00194] Avg episode reward: [(0, '23.210')] +[2024-09-01 16:18:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4620288. Throughput: 0: 223.3. Samples: 151996. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:18:33,180][00194] Avg episode reward: [(0, '23.776')] +[2024-09-01 16:18:38,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4624384. Throughput: 0: 226.7. Samples: 153224. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:18:38,179][00194] Avg episode reward: [(0, '23.531')] +[2024-09-01 16:18:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4628480. Throughput: 0: 226.6. Samples: 154824. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:18:43,185][00194] Avg episode reward: [(0, '24.070')] +[2024-09-01 16:18:45,617][26015] Updated weights for policy 0, policy_version 1131 (0.1686) +[2024-09-01 16:18:48,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4632576. Throughput: 0: 218.0. Samples: 155136. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:18:48,182][00194] Avg episode reward: [(0, '25.152')] +[2024-09-01 16:18:49,159][26002] Signal inference workers to stop experience collection... (150 times) +[2024-09-01 16:18:49,251][26015] InferenceWorker_p0-w0: stopping experience collection (150 times) +[2024-09-01 16:18:50,348][26002] Signal inference workers to resume experience collection... (150 times) +[2024-09-01 16:18:50,349][26015] InferenceWorker_p0-w0: resuming experience collection (150 times) +[2024-09-01 16:18:50,362][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001132_4636672.pth... +[2024-09-01 16:18:50,482][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001081_4427776.pth +[2024-09-01 16:18:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4636672. Throughput: 0: 222.9. Samples: 156748. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:18:53,179][00194] Avg episode reward: [(0, '25.359')] +[2024-09-01 16:18:58,177][00194] Fps is (10 sec: 1229.0, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4644864. Throughput: 0: 229.2. Samples: 158082. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:18:58,185][00194] Avg episode reward: [(0, '25.478')] +[2024-09-01 16:19:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4644864. Throughput: 0: 225.7. Samples: 158848. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:19:03,185][00194] Avg episode reward: [(0, '25.145')] +[2024-09-01 16:19:08,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4648960. Throughput: 0: 221.0. Samples: 159998. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:19:08,179][00194] Avg episode reward: [(0, '24.893')] +[2024-09-01 16:19:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4657152. Throughput: 0: 229.8. Samples: 161454. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:13,180][00194] Avg episode reward: [(0, '24.973')] +[2024-09-01 16:19:18,180][00194] Fps is (10 sec: 1228.5, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4661248. Throughput: 0: 227.1. Samples: 162216. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:18,190][00194] Avg episode reward: [(0, '24.829')] +[2024-09-01 16:19:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4665344. Throughput: 0: 223.3. Samples: 163272. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:23,185][00194] Avg episode reward: [(0, '23.987')] +[2024-09-01 16:19:28,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4669440. Throughput: 0: 223.1. Samples: 164862. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:28,180][00194] Avg episode reward: [(0, '23.948')] +[2024-09-01 16:19:30,568][26015] Updated weights for policy 0, policy_version 1141 (0.0525) +[2024-09-01 16:19:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4673536. Throughput: 0: 230.9. Samples: 165524. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:33,188][00194] Avg episode reward: [(0, '23.924')] +[2024-09-01 16:19:38,180][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 4677632. Throughput: 0: 224.2. Samples: 166836. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:38,185][00194] Avg episode reward: [(0, '23.835')] +[2024-09-01 16:19:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4681728. Throughput: 0: 222.2. Samples: 168080. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:43,180][00194] Avg episode reward: [(0, '23.820')] +[2024-09-01 16:19:48,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4685824. Throughput: 0: 223.5. Samples: 168904. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:48,180][00194] Avg episode reward: [(0, '25.249')] +[2024-09-01 16:19:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4694016. Throughput: 0: 230.3. Samples: 170362. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:53,183][00194] Avg episode reward: [(0, '25.912')] +[2024-09-01 16:19:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 4694016. Throughput: 0: 220.9. Samples: 171394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:19:58,182][00194] Avg episode reward: [(0, '25.991')] +[2024-09-01 16:20:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4702208. Throughput: 0: 219.8. Samples: 172108. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:03,186][00194] Avg episode reward: [(0, '26.219')] +[2024-09-01 16:20:08,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4706304. Throughput: 0: 226.5. Samples: 173464. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:08,180][00194] Avg episode reward: [(0, '25.631')] +[2024-09-01 16:20:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4710400. Throughput: 0: 227.4. Samples: 175094. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:13,180][00194] Avg episode reward: [(0, '26.296')] +[2024-09-01 16:20:16,548][26015] Updated weights for policy 0, policy_version 1151 (0.1530) +[2024-09-01 16:20:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4714496. Throughput: 0: 222.3. Samples: 175526. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:18,183][00194] Avg episode reward: [(0, '26.681')] +[2024-09-01 16:20:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4718592. Throughput: 0: 224.0. Samples: 176914. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:23,181][00194] Avg episode reward: [(0, '26.178')] +[2024-09-01 16:20:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4722688. Throughput: 0: 230.8. Samples: 178464. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:28,180][00194] Avg episode reward: [(0, '26.088')] +[2024-09-01 16:20:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4726784. Throughput: 0: 228.2. Samples: 179174. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:33,180][00194] Avg episode reward: [(0, '25.861')] +[2024-09-01 16:20:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 4730880. Throughput: 0: 217.6. Samples: 180152. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:38,180][00194] Avg episode reward: [(0, '26.192')] +[2024-09-01 16:20:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4739072. Throughput: 0: 231.6. Samples: 181818. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:20:43,179][00194] Avg episode reward: [(0, '27.073')] +[2024-09-01 16:20:46,499][26002] Saving new best policy, reward=27.073! +[2024-09-01 16:20:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4743168. Throughput: 0: 235.6. Samples: 182710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:48,184][00194] Avg episode reward: [(0, '26.905')] +[2024-09-01 16:20:52,211][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001159_4747264.pth... +[2024-09-01 16:20:52,288][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001106_4530176.pth +[2024-09-01 16:20:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4747264. Throughput: 0: 226.1. Samples: 183638. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:53,188][00194] Avg episode reward: [(0, '26.621')] +[2024-09-01 16:20:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 4751360. Throughput: 0: 222.8. Samples: 185118. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:20:58,185][00194] Avg episode reward: [(0, '26.404')] +[2024-09-01 16:21:00,777][26015] Updated weights for policy 0, policy_version 1161 (0.0669) +[2024-09-01 16:21:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4755456. Throughput: 0: 228.1. Samples: 185790. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:03,198][00194] Avg episode reward: [(0, '26.060')] +[2024-09-01 16:21:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4759552. Throughput: 0: 228.3. Samples: 187186. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:21:08,184][00194] Avg episode reward: [(0, '26.880')] +[2024-09-01 16:21:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4763648. Throughput: 0: 220.7. Samples: 188396. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:21:13,181][00194] Avg episode reward: [(0, '27.494')] +[2024-09-01 16:21:15,305][26002] Saving new best policy, reward=27.494! +[2024-09-01 16:21:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4767744. Throughput: 0: 222.0. Samples: 189162. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:21:18,189][00194] Avg episode reward: [(0, '27.752')] +[2024-09-01 16:21:23,044][26002] Saving new best policy, reward=27.752! +[2024-09-01 16:21:23,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4775936. Throughput: 0: 238.4. Samples: 190880. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:23,180][00194] Avg episode reward: [(0, '28.043')] +[2024-09-01 16:21:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4775936. Throughput: 0: 223.1. Samples: 191856. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:28,180][00194] Avg episode reward: [(0, '28.605')] +[2024-09-01 16:21:28,766][26002] Saving new best policy, reward=28.043! +[2024-09-01 16:21:33,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4780032. Throughput: 0: 215.9. Samples: 192426. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:33,185][00194] Avg episode reward: [(0, '28.438')] +[2024-09-01 16:21:33,496][26002] Saving new best policy, reward=28.605! +[2024-09-01 16:21:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4788224. Throughput: 0: 227.5. Samples: 193876. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:38,183][00194] Avg episode reward: [(0, '28.370')] +[2024-09-01 16:21:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4792320. Throughput: 0: 218.1. Samples: 194932. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:43,184][00194] Avg episode reward: [(0, '28.777')] +[2024-09-01 16:21:46,828][26002] Saving new best policy, reward=28.777! +[2024-09-01 16:21:46,833][26015] Updated weights for policy 0, policy_version 1171 (0.0621) +[2024-09-01 16:21:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4796416. Throughput: 0: 225.7. Samples: 195948. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:48,180][00194] Avg episode reward: [(0, '28.218')] +[2024-09-01 16:21:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4800512. Throughput: 0: 221.9. Samples: 197170. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:53,184][00194] Avg episode reward: [(0, '27.670')] +[2024-09-01 16:21:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4804608. Throughput: 0: 237.2. Samples: 199070. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:21:58,179][00194] Avg episode reward: [(0, '27.562')] +[2024-09-01 16:22:03,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4808704. Throughput: 0: 225.7. Samples: 199320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:22:03,182][00194] Avg episode reward: [(0, '27.447')] +[2024-09-01 16:22:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4812800. Throughput: 0: 214.6. Samples: 200536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:22:08,179][00194] Avg episode reward: [(0, '27.085')] +[2024-09-01 16:22:13,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4816896. Throughput: 0: 229.4. Samples: 202180. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:22:13,186][00194] Avg episode reward: [(0, '27.707')] +[2024-09-01 16:22:18,180][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4825088. Throughput: 0: 235.8. Samples: 203036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:22:18,183][00194] Avg episode reward: [(0, '27.036')] +[2024-09-01 16:22:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 4825088. Throughput: 0: 232.8. Samples: 204354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:22:23,180][00194] Avg episode reward: [(0, '27.002')] +[2024-09-01 16:22:28,177][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4833280. Throughput: 0: 233.3. Samples: 205430. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:22:28,186][00194] Avg episode reward: [(0, '27.165')] +[2024-09-01 16:22:31,927][26015] Updated weights for policy 0, policy_version 1181 (0.1670) +[2024-09-01 16:22:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4837376. Throughput: 0: 233.2. Samples: 206442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:22:33,180][00194] Avg episode reward: [(0, '27.113')] +[2024-09-01 16:22:34,243][26002] Signal inference workers to stop experience collection... (200 times) +[2024-09-01 16:22:34,308][26015] InferenceWorker_p0-w0: stopping experience collection (200 times) +[2024-09-01 16:22:35,711][26002] Signal inference workers to resume experience collection... (200 times) +[2024-09-01 16:22:35,713][26015] InferenceWorker_p0-w0: resuming experience collection (200 times) +[2024-09-01 16:22:38,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4841472. Throughput: 0: 232.7. Samples: 207642. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:22:38,190][00194] Avg episode reward: [(0, '26.311')] +[2024-09-01 16:22:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4845568. Throughput: 0: 217.6. Samples: 208860. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:22:43,180][00194] Avg episode reward: [(0, '25.657')] +[2024-09-01 16:22:48,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4849664. Throughput: 0: 225.9. Samples: 209486. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:22:48,181][00194] Avg episode reward: [(0, '25.622')] +[2024-09-01 16:22:50,066][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001185_4853760.pth... +[2024-09-01 16:22:50,182][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001132_4636672.pth +[2024-09-01 16:22:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4853760. Throughput: 0: 240.2. Samples: 211344. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:22:53,185][00194] Avg episode reward: [(0, '25.926')] +[2024-09-01 16:22:58,180][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4857856. Throughput: 0: 226.4. Samples: 212368. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:22:58,188][00194] Avg episode reward: [(0, '25.848')] +[2024-09-01 16:23:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4861952. Throughput: 0: 219.8. Samples: 212928. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:23:03,180][00194] Avg episode reward: [(0, '25.533')] +[2024-09-01 16:23:08,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4866048. Throughput: 0: 228.4. Samples: 214634. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:23:08,184][00194] Avg episode reward: [(0, '24.453')] +[2024-09-01 16:23:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4874240. Throughput: 0: 234.2. Samples: 215968. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:23:13,181][00194] Avg episode reward: [(0, '23.632')] +[2024-09-01 16:23:17,650][26015] Updated weights for policy 0, policy_version 1191 (0.1826) +[2024-09-01 16:23:18,177][00194] Fps is (10 sec: 1228.9, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4878336. Throughput: 0: 226.5. Samples: 216634. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:23:18,184][00194] Avg episode reward: [(0, '23.473')] +[2024-09-01 16:23:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4882432. Throughput: 0: 221.3. Samples: 217600. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:23:23,180][00194] Avg episode reward: [(0, '24.007')] +[2024-09-01 16:23:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4886528. Throughput: 0: 235.7. Samples: 219466. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:23:28,180][00194] Avg episode reward: [(0, '24.305')] +[2024-09-01 16:23:33,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4890624. Throughput: 0: 229.4. Samples: 219810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:33,182][00194] Avg episode reward: [(0, '24.018')] +[2024-09-01 16:23:38,180][00194] Fps is (10 sec: 818.9, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4894720. Throughput: 0: 215.8. Samples: 221054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:38,186][00194] Avg episode reward: [(0, '24.131')] +[2024-09-01 16:23:43,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4898816. Throughput: 0: 229.1. Samples: 222678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:43,179][00194] Avg episode reward: [(0, '23.711')] +[2024-09-01 16:23:48,177][00194] Fps is (10 sec: 1229.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4907008. Throughput: 0: 232.4. Samples: 223386. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:48,180][00194] Avg episode reward: [(0, '24.096')] +[2024-09-01 16:23:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 4907008. Throughput: 0: 224.2. Samples: 224722. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:53,180][00194] Avg episode reward: [(0, '24.100')] +[2024-09-01 16:23:58,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4911104. Throughput: 0: 217.7. Samples: 225764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:23:58,185][00194] Avg episode reward: [(0, '24.015')] +[2024-09-01 16:24:02,535][26015] Updated weights for policy 0, policy_version 1201 (0.2548) +[2024-09-01 16:24:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4919296. Throughput: 0: 224.9. Samples: 226756. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:03,180][00194] Avg episode reward: [(0, '24.153')] +[2024-09-01 16:24:08,179][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4923392. Throughput: 0: 235.8. Samples: 228210. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:08,186][00194] Avg episode reward: [(0, '24.533')] +[2024-09-01 16:24:13,182][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 4927488. Throughput: 0: 216.9. Samples: 229228. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:13,190][00194] Avg episode reward: [(0, '24.135')] +[2024-09-01 16:24:18,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4931584. Throughput: 0: 223.7. Samples: 229878. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:18,180][00194] Avg episode reward: [(0, '24.265')] +[2024-09-01 16:24:23,177][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4935680. Throughput: 0: 237.3. Samples: 231730. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:23,180][00194] Avg episode reward: [(0, '25.074')] +[2024-09-01 16:24:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4939776. Throughput: 0: 227.8. Samples: 232930. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:28,180][00194] Avg episode reward: [(0, '25.369')] +[2024-09-01 16:24:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4943872. Throughput: 0: 220.6. Samples: 233312. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:33,186][00194] Avg episode reward: [(0, '25.850')] +[2024-09-01 16:24:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 4952064. Throughput: 0: 227.4. Samples: 234956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:38,182][00194] Avg episode reward: [(0, '25.851')] +[2024-09-01 16:24:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4956160. Throughput: 0: 235.2. Samples: 236346. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:43,184][00194] Avg episode reward: [(0, '26.430')] +[2024-09-01 16:24:47,505][26015] Updated weights for policy 0, policy_version 1211 (0.0542) +[2024-09-01 16:24:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4960256. Throughput: 0: 228.0. Samples: 237016. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:24:48,180][00194] Avg episode reward: [(0, '26.794')] +[2024-09-01 16:24:52,208][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001212_4964352.pth... +[2024-09-01 16:24:52,323][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001159_4747264.pth +[2024-09-01 16:24:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4964352. Throughput: 0: 217.7. Samples: 238006. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:24:53,180][00194] Avg episode reward: [(0, '26.405')] +[2024-09-01 16:24:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 4968448. Throughput: 0: 232.1. Samples: 239670. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:24:58,180][00194] Avg episode reward: [(0, '25.465')] +[2024-09-01 16:25:03,178][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4972544. Throughput: 0: 232.1. Samples: 240322. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:03,184][00194] Avg episode reward: [(0, '25.199')] +[2024-09-01 16:25:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4976640. Throughput: 0: 214.8. Samples: 241394. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:08,186][00194] Avg episode reward: [(0, '25.529')] +[2024-09-01 16:25:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4980736. Throughput: 0: 226.1. Samples: 243106. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:13,179][00194] Avg episode reward: [(0, '25.556')] +[2024-09-01 16:25:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 4988928. Throughput: 0: 236.1. Samples: 243938. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:18,180][00194] Avg episode reward: [(0, '26.311')] +[2024-09-01 16:25:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4988928. Throughput: 0: 224.8. Samples: 245072. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:23,180][00194] Avg episode reward: [(0, '25.548')] +[2024-09-01 16:25:28,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 4993024. Throughput: 0: 219.0. Samples: 246200. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:28,179][00194] Avg episode reward: [(0, '25.154')] +[2024-09-01 16:25:32,397][26015] Updated weights for policy 0, policy_version 1221 (0.1037) +[2024-09-01 16:25:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5001216. Throughput: 0: 223.4. Samples: 247070. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:33,180][00194] Avg episode reward: [(0, '25.424')] +[2024-09-01 16:25:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5005312. Throughput: 0: 228.9. Samples: 248306. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:38,181][00194] Avg episode reward: [(0, '24.996')] +[2024-09-01 16:25:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5009408. Throughput: 0: 219.6. Samples: 249550. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:43,182][00194] Avg episode reward: [(0, '25.339')] +[2024-09-01 16:25:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5013504. Throughput: 0: 220.8. Samples: 250260. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:48,185][00194] Avg episode reward: [(0, '25.125')] +[2024-09-01 16:25:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5017600. Throughput: 0: 237.2. Samples: 252066. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:53,180][00194] Avg episode reward: [(0, '25.237')] +[2024-09-01 16:25:58,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5021696. Throughput: 0: 227.4. Samples: 253340. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:25:58,180][00194] Avg episode reward: [(0, '25.443')] +[2024-09-01 16:26:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5025792. Throughput: 0: 216.5. Samples: 253682. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:03,184][00194] Avg episode reward: [(0, '24.973')] +[2024-09-01 16:26:08,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5029888. Throughput: 0: 229.0. Samples: 255378. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:08,185][00194] Avg episode reward: [(0, '25.326')] +[2024-09-01 16:26:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5038080. Throughput: 0: 233.8. Samples: 256720. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:13,184][00194] Avg episode reward: [(0, '25.131')] +[2024-09-01 16:26:17,755][26015] Updated weights for policy 0, policy_version 1231 (0.0622) +[2024-09-01 16:26:18,178][00194] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5042176. Throughput: 0: 230.3. Samples: 257432. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:18,182][00194] Avg episode reward: [(0, '24.910')] +[2024-09-01 16:26:21,209][26002] Signal inference workers to stop experience collection... (250 times) +[2024-09-01 16:26:21,250][26015] InferenceWorker_p0-w0: stopping experience collection (250 times) +[2024-09-01 16:26:22,397][26002] Signal inference workers to resume experience collection... (250 times) +[2024-09-01 16:26:22,398][26015] InferenceWorker_p0-w0: resuming experience collection (250 times) +[2024-09-01 16:26:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5046272. Throughput: 0: 226.0. Samples: 258476. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:23,185][00194] Avg episode reward: [(0, '24.996')] +[2024-09-01 16:26:28,177][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5050368. Throughput: 0: 238.2. Samples: 260268. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:28,179][00194] Avg episode reward: [(0, '25.330')] +[2024-09-01 16:26:33,181][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5054464. Throughput: 0: 234.2. Samples: 260800. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:33,188][00194] Avg episode reward: [(0, '24.743')] +[2024-09-01 16:26:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5058560. Throughput: 0: 217.7. Samples: 261862. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:38,180][00194] Avg episode reward: [(0, '24.999')] +[2024-09-01 16:26:43,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5062656. Throughput: 0: 226.2. Samples: 263518. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:43,181][00194] Avg episode reward: [(0, '24.556')] +[2024-09-01 16:26:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5070848. Throughput: 0: 233.8. Samples: 264204. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:48,179][00194] Avg episode reward: [(0, '24.914')] +[2024-09-01 16:26:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5070848. Throughput: 0: 227.9. Samples: 265632. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:53,180][00194] Avg episode reward: [(0, '24.642')] +[2024-09-01 16:26:53,769][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001239_5074944.pth... +[2024-09-01 16:26:53,847][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001185_4853760.pth +[2024-09-01 16:26:58,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5074944. Throughput: 0: 220.5. Samples: 266642. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:26:58,185][00194] Avg episode reward: [(0, '25.300')] +[2024-09-01 16:27:02,466][26015] Updated weights for policy 0, policy_version 1241 (0.1041) +[2024-09-01 16:27:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5083136. Throughput: 0: 228.3. Samples: 267704. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:03,179][00194] Avg episode reward: [(0, '25.853')] +[2024-09-01 16:27:08,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5087232. Throughput: 0: 228.9. Samples: 268778. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:08,184][00194] Avg episode reward: [(0, '26.317')] +[2024-09-01 16:27:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5091328. Throughput: 0: 215.9. Samples: 269982. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:13,180][00194] Avg episode reward: [(0, '26.383')] +[2024-09-01 16:27:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5095424. Throughput: 0: 220.7. Samples: 270730. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:18,179][00194] Avg episode reward: [(0, '26.575')] +[2024-09-01 16:27:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5099520. Throughput: 0: 230.7. Samples: 272244. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:23,179][00194] Avg episode reward: [(0, '25.886')] +[2024-09-01 16:27:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5103616. Throughput: 0: 211.2. Samples: 273020. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:28,181][00194] Avg episode reward: [(0, '26.109')] +[2024-09-01 16:27:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5107712. Throughput: 0: 219.8. Samples: 274096. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:33,182][00194] Avg episode reward: [(0, '26.587')] +[2024-09-01 16:27:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5111808. Throughput: 0: 225.7. Samples: 275788. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:27:38,179][00194] Avg episode reward: [(0, '25.520')] +[2024-09-01 16:27:43,178][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5120000. Throughput: 0: 218.7. Samples: 276484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:27:43,182][00194] Avg episode reward: [(0, '26.484')] +[2024-09-01 16:27:48,000][26015] Updated weights for policy 0, policy_version 1251 (0.1605) +[2024-09-01 16:27:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5124096. Throughput: 0: 225.5. Samples: 277850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:27:48,184][00194] Avg episode reward: [(0, '26.844')] +[2024-09-01 16:27:53,177][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5128192. Throughput: 0: 226.1. Samples: 278952. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:27:53,180][00194] Avg episode reward: [(0, '27.357')] +[2024-09-01 16:27:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5132288. Throughput: 0: 235.6. Samples: 280584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:27:58,180][00194] Avg episode reward: [(0, '27.600')] +[2024-09-01 16:28:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5136384. Throughput: 0: 229.6. Samples: 281062. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:03,179][00194] Avg episode reward: [(0, '27.089')] +[2024-09-01 16:28:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5140480. Throughput: 0: 223.0. Samples: 282278. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:08,185][00194] Avg episode reward: [(0, '26.846')] +[2024-09-01 16:28:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5144576. Throughput: 0: 243.2. Samples: 283962. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:28:13,179][00194] Avg episode reward: [(0, '26.327')] +[2024-09-01 16:28:18,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5148672. Throughput: 0: 235.3. Samples: 284686. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:28:18,187][00194] Avg episode reward: [(0, '26.687')] +[2024-09-01 16:28:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5152768. Throughput: 0: 225.2. Samples: 285922. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:23,183][00194] Avg episode reward: [(0, '26.512')] +[2024-09-01 16:28:28,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5156864. Throughput: 0: 237.1. Samples: 287152. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:28,179][00194] Avg episode reward: [(0, '26.852')] +[2024-09-01 16:28:32,670][26015] Updated weights for policy 0, policy_version 1261 (0.1477) +[2024-09-01 16:28:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5165056. Throughput: 0: 222.5. Samples: 287864. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:28:33,180][00194] Avg episode reward: [(0, '26.852')] +[2024-09-01 16:28:38,180][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5169152. Throughput: 0: 227.6. Samples: 289194. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:28:38,187][00194] Avg episode reward: [(0, '26.862')] +[2024-09-01 16:28:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5173248. Throughput: 0: 220.3. Samples: 290496. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:28:43,184][00194] Avg episode reward: [(0, '27.232')] +[2024-09-01 16:28:48,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5177344. Throughput: 0: 224.7. Samples: 291172. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:48,185][00194] Avg episode reward: [(0, '27.631')] +[2024-09-01 16:28:50,752][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001265_5181440.pth... +[2024-09-01 16:28:50,864][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001212_4964352.pth +[2024-09-01 16:28:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5181440. Throughput: 0: 233.2. Samples: 292770. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:28:53,184][00194] Avg episode reward: [(0, '27.701')] +[2024-09-01 16:28:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5185536. Throughput: 0: 228.1. Samples: 294226. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:28:58,180][00194] Avg episode reward: [(0, '27.442')] +[2024-09-01 16:29:03,178][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5189632. Throughput: 0: 218.4. Samples: 294512. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:03,189][00194] Avg episode reward: [(0, '27.442')] +[2024-09-01 16:29:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5193728. Throughput: 0: 223.6. Samples: 295986. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:08,179][00194] Avg episode reward: [(0, '28.054')] +[2024-09-01 16:29:13,177][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5201920. Throughput: 0: 227.2. Samples: 297374. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:13,184][00194] Avg episode reward: [(0, '27.852')] +[2024-09-01 16:29:17,923][26015] Updated weights for policy 0, policy_version 1271 (0.1976) +[2024-09-01 16:29:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5206016. Throughput: 0: 228.8. Samples: 298162. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:18,181][00194] Avg episode reward: [(0, '28.579')] +[2024-09-01 16:29:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5210112. Throughput: 0: 227.1. Samples: 299414. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:29:23,179][00194] Avg episode reward: [(0, '28.837')] +[2024-09-01 16:29:26,920][26002] Saving new best policy, reward=28.837! +[2024-09-01 16:29:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5214208. Throughput: 0: 232.1. Samples: 300942. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:29:28,180][00194] Avg episode reward: [(0, '29.127')] +[2024-09-01 16:29:30,759][26002] Saving new best policy, reward=29.127! +[2024-09-01 16:29:33,183][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5218304. Throughput: 0: 231.8. Samples: 301604. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:29:33,186][00194] Avg episode reward: [(0, '29.376')] +[2024-09-01 16:29:36,180][26002] Saving new best policy, reward=29.376! +[2024-09-01 16:29:38,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5222400. Throughput: 0: 218.9. Samples: 302620. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:29:38,183][00194] Avg episode reward: [(0, '29.563')] +[2024-09-01 16:29:41,129][26002] Saving new best policy, reward=29.563! +[2024-09-01 16:29:43,177][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5226496. Throughput: 0: 223.8. Samples: 304298. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:29:43,185][00194] Avg episode reward: [(0, '28.905')] +[2024-09-01 16:29:48,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5230592. Throughput: 0: 232.2. Samples: 304960. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:29:48,185][00194] Avg episode reward: [(0, '29.316')] +[2024-09-01 16:29:53,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5234688. Throughput: 0: 227.9. Samples: 306240. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:53,184][00194] Avg episode reward: [(0, '29.042')] +[2024-09-01 16:29:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5238784. Throughput: 0: 223.7. Samples: 307440. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:29:58,180][00194] Avg episode reward: [(0, '28.826')] +[2024-09-01 16:30:03,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5242880. Throughput: 0: 223.8. Samples: 308232. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:30:03,184][00194] Avg episode reward: [(0, '28.151')] +[2024-09-01 16:30:03,523][26015] Updated weights for policy 0, policy_version 1281 (0.0552) +[2024-09-01 16:30:05,793][26002] Signal inference workers to stop experience collection... (300 times) +[2024-09-01 16:30:05,836][26015] InferenceWorker_p0-w0: stopping experience collection (300 times) +[2024-09-01 16:30:07,287][26002] Signal inference workers to resume experience collection... (300 times) +[2024-09-01 16:30:07,289][26015] InferenceWorker_p0-w0: resuming experience collection (300 times) +[2024-09-01 16:30:08,177][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5251072. Throughput: 0: 227.9. Samples: 309668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:30:08,180][00194] Avg episode reward: [(0, '28.279')] +[2024-09-01 16:30:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5255168. Throughput: 0: 207.8. Samples: 310294. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:30:13,180][00194] Avg episode reward: [(0, '28.296')] +[2024-09-01 16:30:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5259264. Throughput: 0: 217.1. Samples: 311372. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:18,187][00194] Avg episode reward: [(0, '27.743')] +[2024-09-01 16:30:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5263360. Throughput: 0: 231.5. Samples: 313036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:23,185][00194] Avg episode reward: [(0, '27.846')] +[2024-09-01 16:30:28,180][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5267456. Throughput: 0: 224.3. Samples: 314390. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:28,182][00194] Avg episode reward: [(0, '27.703')] +[2024-09-01 16:30:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5271552. Throughput: 0: 220.7. Samples: 314890. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:33,185][00194] Avg episode reward: [(0, '27.890')] +[2024-09-01 16:30:38,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5275648. Throughput: 0: 226.4. Samples: 316426. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:30:38,180][00194] Avg episode reward: [(0, '27.052')] +[2024-09-01 16:30:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5279744. Throughput: 0: 231.5. Samples: 317856. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:30:43,186][00194] Avg episode reward: [(0, '26.846')] +[2024-09-01 16:30:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5283840. Throughput: 0: 232.7. Samples: 318702. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:48,179][00194] Avg episode reward: [(0, '26.407')] +[2024-09-01 16:30:48,655][26015] Updated weights for policy 0, policy_version 1291 (0.2201) +[2024-09-01 16:30:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5287936. Throughput: 0: 224.4. Samples: 319768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:53,186][00194] Avg episode reward: [(0, '26.771')] +[2024-09-01 16:30:53,484][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001292_5292032.pth... +[2024-09-01 16:30:53,591][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001239_5074944.pth +[2024-09-01 16:30:58,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5296128. Throughput: 0: 241.4. Samples: 321156. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:30:58,180][00194] Avg episode reward: [(0, '26.717')] +[2024-09-01 16:31:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5300224. Throughput: 0: 234.9. Samples: 321944. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:03,179][00194] Avg episode reward: [(0, '26.465')] +[2024-09-01 16:31:08,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5304320. Throughput: 0: 224.1. Samples: 323120. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:08,182][00194] Avg episode reward: [(0, '26.401')] +[2024-09-01 16:31:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5308416. Throughput: 0: 229.1. Samples: 324698. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:13,182][00194] Avg episode reward: [(0, '26.210')] +[2024-09-01 16:31:18,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5312512. Throughput: 0: 232.5. Samples: 325354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:18,185][00194] Avg episode reward: [(0, '25.831')] +[2024-09-01 16:31:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5316608. Throughput: 0: 233.2. Samples: 326918. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:31:23,180][00194] Avg episode reward: [(0, '25.144')] +[2024-09-01 16:31:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5320704. Throughput: 0: 224.7. Samples: 327966. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:31:28,180][00194] Avg episode reward: [(0, '25.316')] +[2024-09-01 16:31:33,076][26015] Updated weights for policy 0, policy_version 1301 (0.0550) +[2024-09-01 16:31:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5328896. Throughput: 0: 220.6. Samples: 328630. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:33,179][00194] Avg episode reward: [(0, '25.962')] +[2024-09-01 16:31:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5332992. Throughput: 0: 230.1. Samples: 330122. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:31:38,180][00194] Avg episode reward: [(0, '25.709')] +[2024-09-01 16:31:43,181][00194] Fps is (10 sec: 818.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5337088. Throughput: 0: 221.4. Samples: 331122. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:31:43,191][00194] Avg episode reward: [(0, '25.805')] +[2024-09-01 16:31:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5341184. Throughput: 0: 223.1. Samples: 331982. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:31:48,179][00194] Avg episode reward: [(0, '26.897')] +[2024-09-01 16:31:53,177][00194] Fps is (10 sec: 819.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5345280. Throughput: 0: 233.8. Samples: 333642. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:31:53,186][00194] Avg episode reward: [(0, '27.249')] +[2024-09-01 16:31:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5349376. Throughput: 0: 231.3. Samples: 335106. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:31:58,184][00194] Avg episode reward: [(0, '26.684')] +[2024-09-01 16:32:03,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5353472. Throughput: 0: 224.4. Samples: 335450. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:32:03,189][00194] Avg episode reward: [(0, '26.436')] +[2024-09-01 16:32:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5357568. Throughput: 0: 223.6. Samples: 336978. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:32:08,180][00194] Avg episode reward: [(0, '26.042')] +[2024-09-01 16:32:13,177][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5365760. Throughput: 0: 231.1. Samples: 338364. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:32:13,180][00194] Avg episode reward: [(0, '26.699')] +[2024-09-01 16:32:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5365760. Throughput: 0: 234.7. Samples: 339192. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:32:18,180][00194] Avg episode reward: [(0, '26.955')] +[2024-09-01 16:32:18,587][26015] Updated weights for policy 0, policy_version 1311 (0.1637) +[2024-09-01 16:32:23,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5369856. Throughput: 0: 227.7. Samples: 340368. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:32:23,186][00194] Avg episode reward: [(0, '26.630')] +[2024-09-01 16:32:28,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5378048. Throughput: 0: 236.6. Samples: 341768. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:32:28,183][00194] Avg episode reward: [(0, '26.073')] +[2024-09-01 16:32:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5382144. Throughput: 0: 231.7. Samples: 342408. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:32:33,180][00194] Avg episode reward: [(0, '25.973')] +[2024-09-01 16:32:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5386240. Throughput: 0: 220.3. Samples: 343554. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:32:38,180][00194] Avg episode reward: [(0, '25.922')] +[2024-09-01 16:32:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5390336. Throughput: 0: 221.9. Samples: 345090. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:32:43,180][00194] Avg episode reward: [(0, '26.415')] +[2024-09-01 16:32:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5394432. Throughput: 0: 224.7. Samples: 345560. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:32:48,180][00194] Avg episode reward: [(0, '25.910')] +[2024-09-01 16:32:49,251][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001318_5398528.pth... +[2024-09-01 16:32:49,362][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001265_5181440.pth +[2024-09-01 16:32:53,180][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5398528. Throughput: 0: 227.6. Samples: 347220. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:32:53,185][00194] Avg episode reward: [(0, '25.335')] +[2024-09-01 16:32:58,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5402624. Throughput: 0: 221.5. Samples: 348330. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:32:58,186][00194] Avg episode reward: [(0, '25.732')] +[2024-09-01 16:33:03,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5406720. Throughput: 0: 217.0. Samples: 348956. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:03,183][00194] Avg episode reward: [(0, '25.498')] +[2024-09-01 16:33:03,571][26015] Updated weights for policy 0, policy_version 1321 (0.1594) +[2024-09-01 16:33:08,178][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5414912. Throughput: 0: 227.3. Samples: 350596. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:08,188][00194] Avg episode reward: [(0, '25.190')] +[2024-09-01 16:33:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5419008. Throughput: 0: 219.7. Samples: 351656. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:13,183][00194] Avg episode reward: [(0, '25.469')] +[2024-09-01 16:33:18,177][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5423104. Throughput: 0: 223.4. Samples: 352462. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:18,187][00194] Avg episode reward: [(0, '26.090')] +[2024-09-01 16:33:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5427200. Throughput: 0: 226.1. Samples: 353730. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:23,180][00194] Avg episode reward: [(0, '26.379')] +[2024-09-01 16:33:28,180][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5431296. Throughput: 0: 229.7. Samples: 355428. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:33:28,182][00194] Avg episode reward: [(0, '26.392')] +[2024-09-01 16:33:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5435392. Throughput: 0: 227.3. Samples: 355790. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:33:33,185][00194] Avg episode reward: [(0, '25.307')] +[2024-09-01 16:33:38,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5439488. Throughput: 0: 224.9. Samples: 357338. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:33:38,190][00194] Avg episode reward: [(0, '25.175')] +[2024-09-01 16:33:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5443584. Throughput: 0: 233.1. Samples: 358820. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:33:43,186][00194] Avg episode reward: [(0, '24.816')] +[2024-09-01 16:33:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5447680. Throughput: 0: 236.6. Samples: 359604. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:33:48,180][00194] Avg episode reward: [(0, '24.875')] +[2024-09-01 16:33:48,923][26015] Updated weights for policy 0, policy_version 1331 (0.1040) +[2024-09-01 16:33:52,544][26002] Signal inference workers to stop experience collection... (350 times) +[2024-09-01 16:33:52,592][26015] InferenceWorker_p0-w0: stopping experience collection (350 times) +[2024-09-01 16:33:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5451776. Throughput: 0: 224.4. Samples: 360694. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:33:53,186][00194] Avg episode reward: [(0, '24.882')] +[2024-09-01 16:33:53,817][26002] Signal inference workers to resume experience collection... (350 times) +[2024-09-01 16:33:53,818][26015] InferenceWorker_p0-w0: resuming experience collection (350 times) +[2024-09-01 16:33:58,178][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5459968. Throughput: 0: 233.2. Samples: 362152. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:33:58,184][00194] Avg episode reward: [(0, '25.407')] +[2024-09-01 16:34:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5464064. Throughput: 0: 232.4. Samples: 362920. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:34:03,187][00194] Avg episode reward: [(0, '24.828')] +[2024-09-01 16:34:08,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5468160. Throughput: 0: 227.6. Samples: 363972. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:34:08,182][00194] Avg episode reward: [(0, '24.905')] +[2024-09-01 16:34:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5472256. Throughput: 0: 218.9. Samples: 365276. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:34:13,179][00194] Avg episode reward: [(0, '25.595')] +[2024-09-01 16:34:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5476352. Throughput: 0: 229.0. Samples: 366094. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:34:18,181][00194] Avg episode reward: [(0, '25.096')] +[2024-09-01 16:34:23,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5480448. Throughput: 0: 229.7. Samples: 367674. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:34:23,182][00194] Avg episode reward: [(0, '24.877')] +[2024-09-01 16:34:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5484544. Throughput: 0: 220.9. Samples: 368760. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:34:28,179][00194] Avg episode reward: [(0, '25.541')] +[2024-09-01 16:34:33,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5488640. Throughput: 0: 214.0. Samples: 369236. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 16:34:33,180][00194] Avg episode reward: [(0, '25.725')] +[2024-09-01 16:34:33,851][26015] Updated weights for policy 0, policy_version 1341 (0.1523) +[2024-09-01 16:34:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5496832. Throughput: 0: 229.5. Samples: 371022. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:34:38,180][00194] Avg episode reward: [(0, '25.720')] +[2024-09-01 16:34:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5496832. Throughput: 0: 221.5. Samples: 372120. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:34:43,179][00194] Avg episode reward: [(0, '25.858')] +[2024-09-01 16:34:48,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5500928. Throughput: 0: 219.4. Samples: 372794. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 16:34:48,180][00194] Avg episode reward: [(0, '26.018')] +[2024-09-01 16:34:52,017][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001345_5509120.pth... +[2024-09-01 16:34:52,131][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001292_5292032.pth +[2024-09-01 16:34:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5509120. Throughput: 0: 227.4. Samples: 374206. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:34:53,180][00194] Avg episode reward: [(0, '26.609')] +[2024-09-01 16:34:58,178][00194] Fps is (10 sec: 1228.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 5513216. Throughput: 0: 231.2. Samples: 375680. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:34:58,190][00194] Avg episode reward: [(0, '26.783')] +[2024-09-01 16:35:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5517312. Throughput: 0: 226.6. Samples: 376292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:03,183][00194] Avg episode reward: [(0, '26.419')] +[2024-09-01 16:35:08,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5521408. Throughput: 0: 214.1. Samples: 377310. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:08,186][00194] Avg episode reward: [(0, '26.419')] +[2024-09-01 16:35:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5525504. Throughput: 0: 234.5. Samples: 379312. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:13,183][00194] Avg episode reward: [(0, '27.281')] +[2024-09-01 16:35:18,179][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5529600. Throughput: 0: 232.7. Samples: 379710. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:18,184][00194] Avg episode reward: [(0, '26.924')] +[2024-09-01 16:35:20,389][26015] Updated weights for policy 0, policy_version 1351 (0.0541) +[2024-09-01 16:35:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5533696. Throughput: 0: 215.7. Samples: 380728. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:23,181][00194] Avg episode reward: [(0, '26.941')] +[2024-09-01 16:35:28,177][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5537792. Throughput: 0: 226.6. Samples: 382318. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:28,186][00194] Avg episode reward: [(0, '26.888')] +[2024-09-01 16:35:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5545984. Throughput: 0: 229.3. Samples: 383112. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:33,181][00194] Avg episode reward: [(0, '27.257')] +[2024-09-01 16:35:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 5545984. Throughput: 0: 225.2. Samples: 384342. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:35:38,180][00194] Avg episode reward: [(0, '27.257')] +[2024-09-01 16:35:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5554176. Throughput: 0: 217.3. Samples: 385458. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:35:43,179][00194] Avg episode reward: [(0, '27.668')] +[2024-09-01 16:35:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5558272. Throughput: 0: 224.6. Samples: 386400. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:35:48,180][00194] Avg episode reward: [(0, '27.325')] +[2024-09-01 16:35:53,177][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5558272. Throughput: 0: 225.4. Samples: 387454. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:35:53,185][00194] Avg episode reward: [(0, '27.561')] +[2024-09-01 16:35:58,177][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5562368. Throughput: 0: 181.6. Samples: 387486. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:35:58,180][00194] Avg episode reward: [(0, '27.645')] +[2024-09-01 16:36:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5566464. Throughput: 0: 193.9. Samples: 388434. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:36:03,184][00194] Avg episode reward: [(0, '27.329')] +[2024-09-01 16:36:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5570560. Throughput: 0: 204.4. Samples: 389926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:08,184][00194] Avg episode reward: [(0, '26.799')] +[2024-09-01 16:36:10,656][26015] Updated weights for policy 0, policy_version 1361 (0.3310) +[2024-09-01 16:36:13,178][00194] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5574656. Throughput: 0: 200.3. Samples: 391330. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:13,181][00194] Avg episode reward: [(0, '26.127')] +[2024-09-01 16:36:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5578752. Throughput: 0: 191.1. Samples: 391712. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:18,181][00194] Avg episode reward: [(0, '26.861')] +[2024-09-01 16:36:23,177][00194] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5582848. Throughput: 0: 193.0. Samples: 393028. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:23,179][00194] Avg episode reward: [(0, '26.413')] +[2024-09-01 16:36:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 5586944. Throughput: 0: 204.6. Samples: 394666. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:36:28,185][00194] Avg episode reward: [(0, '26.205')] +[2024-09-01 16:36:33,180][00194] Fps is (10 sec: 818.9, 60 sec: 750.9, 300 sec: 874.7). Total num frames: 5591040. Throughput: 0: 203.0. Samples: 395536. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:36:33,183][00194] Avg episode reward: [(0, '26.133')] +[2024-09-01 16:36:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.8). Total num frames: 5595136. Throughput: 0: 201.6. Samples: 396526. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:38,180][00194] Avg episode reward: [(0, '26.493')] +[2024-09-01 16:36:43,177][00194] Fps is (10 sec: 1229.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5603328. Throughput: 0: 212.4. Samples: 397046. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:43,180][00194] Avg episode reward: [(0, '26.270')] +[2024-09-01 16:36:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 5607424. Throughput: 0: 229.1. Samples: 398742. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:48,184][00194] Avg episode reward: [(0, '26.885')] +[2024-09-01 16:36:52,435][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001370_5611520.pth... +[2024-09-01 16:36:52,584][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001318_5398528.pth +[2024-09-01 16:36:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5611520. Throughput: 0: 219.0. Samples: 399782. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:53,184][00194] Avg episode reward: [(0, '27.118')] +[2024-09-01 16:36:57,312][26015] Updated weights for policy 0, policy_version 1371 (0.2019) +[2024-09-01 16:36:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5615616. Throughput: 0: 217.3. Samples: 401110. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:36:58,179][00194] Avg episode reward: [(0, '27.157')] +[2024-09-01 16:37:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5619712. Throughput: 0: 224.1. Samples: 401796. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:37:03,185][00194] Avg episode reward: [(0, '27.084')] +[2024-09-01 16:37:08,179][00194] Fps is (10 sec: 819.1, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 5623808. Throughput: 0: 224.3. Samples: 403122. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:37:08,182][00194] Avg episode reward: [(0, '26.167')] +[2024-09-01 16:37:13,178][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5627904. Throughput: 0: 213.5. Samples: 404274. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:37:13,182][00194] Avg episode reward: [(0, '26.559')] +[2024-09-01 16:37:18,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5632000. Throughput: 0: 211.1. Samples: 405036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:37:18,188][00194] Avg episode reward: [(0, '26.834')] +[2024-09-01 16:37:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5636096. Throughput: 0: 227.5. Samples: 406764. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:23,185][00194] Avg episode reward: [(0, '26.512')] +[2024-09-01 16:37:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5640192. Throughput: 0: 240.3. Samples: 407858. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:28,180][00194] Avg episode reward: [(0, '26.180')] +[2024-09-01 16:37:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5644288. Throughput: 0: 214.3. Samples: 408384. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:37:33,182][00194] Avg episode reward: [(0, '26.672')] +[2024-09-01 16:37:38,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5652480. Throughput: 0: 225.7. Samples: 409938. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:38,180][00194] Avg episode reward: [(0, '26.354')] +[2024-09-01 16:37:41,426][26015] Updated weights for policy 0, policy_version 1381 (0.1659) +[2024-09-01 16:37:43,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5656576. Throughput: 0: 216.3. Samples: 410844. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:43,179][00194] Avg episode reward: [(0, '25.707')] +[2024-09-01 16:37:45,197][26002] Signal inference workers to stop experience collection... (400 times) +[2024-09-01 16:37:45,287][26015] InferenceWorker_p0-w0: stopping experience collection (400 times) +[2024-09-01 16:37:46,980][26002] Signal inference workers to resume experience collection... (400 times) +[2024-09-01 16:37:46,982][26015] InferenceWorker_p0-w0: resuming experience collection (400 times) +[2024-09-01 16:37:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5660672. Throughput: 0: 226.3. Samples: 411978. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:48,181][00194] Avg episode reward: [(0, '25.427')] +[2024-09-01 16:37:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5664768. Throughput: 0: 223.8. Samples: 413194. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:53,180][00194] Avg episode reward: [(0, '25.146')] +[2024-09-01 16:37:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5668864. Throughput: 0: 237.0. Samples: 414940. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:37:58,180][00194] Avg episode reward: [(0, '25.685')] +[2024-09-01 16:38:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5672960. Throughput: 0: 232.2. Samples: 415484. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:38:03,180][00194] Avg episode reward: [(0, '26.396')] +[2024-09-01 16:38:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5677056. Throughput: 0: 216.0. Samples: 416486. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:08,180][00194] Avg episode reward: [(0, '25.876')] +[2024-09-01 16:38:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5681152. Throughput: 0: 228.4. Samples: 418138. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:13,184][00194] Avg episode reward: [(0, '26.671')] +[2024-09-01 16:38:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5689344. Throughput: 0: 236.4. Samples: 419024. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:18,180][00194] Avg episode reward: [(0, '27.117')] +[2024-09-01 16:38:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5689344. Throughput: 0: 224.5. Samples: 420042. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:23,180][00194] Avg episode reward: [(0, '27.637')] +[2024-09-01 16:38:28,177][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5693440. Throughput: 0: 233.0. Samples: 421328. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 16:38:28,188][00194] Avg episode reward: [(0, '27.652')] +[2024-09-01 16:38:28,341][26015] Updated weights for policy 0, policy_version 1391 (0.1704) +[2024-09-01 16:38:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5701632. Throughput: 0: 229.3. Samples: 422298. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:33,186][00194] Avg episode reward: [(0, '27.748')] +[2024-09-01 16:38:38,178][00194] Fps is (10 sec: 1228.6, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 5705728. Throughput: 0: 231.5. Samples: 423610. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:38,182][00194] Avg episode reward: [(0, '27.597')] +[2024-09-01 16:38:43,178][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5709824. Throughput: 0: 208.8. Samples: 424336. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:43,183][00194] Avg episode reward: [(0, '27.989')] +[2024-09-01 16:38:48,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5713920. Throughput: 0: 221.0. Samples: 425428. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:48,185][00194] Avg episode reward: [(0, '28.276')] +[2024-09-01 16:38:50,276][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001396_5718016.pth... +[2024-09-01 16:38:50,382][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001345_5509120.pth +[2024-09-01 16:38:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5718016. Throughput: 0: 240.1. Samples: 427292. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:53,183][00194] Avg episode reward: [(0, '27.585')] +[2024-09-01 16:38:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5722112. Throughput: 0: 229.6. Samples: 428470. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 16:38:58,180][00194] Avg episode reward: [(0, '27.747')] +[2024-09-01 16:39:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5726208. Throughput: 0: 217.2. Samples: 428796. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:39:03,182][00194] Avg episode reward: [(0, '27.350')] +[2024-09-01 16:39:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5730304. Throughput: 0: 235.5. Samples: 430640. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 16:39:08,180][00194] Avg episode reward: [(0, '26.992')] +[2024-09-01 16:39:12,822][26015] Updated weights for policy 0, policy_version 1401 (0.1187) +[2024-09-01 16:39:13,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5738496. Throughput: 0: 231.6. Samples: 431750. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:39:13,186][00194] Avg episode reward: [(0, '27.130')] +[2024-09-01 16:39:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 5738496. Throughput: 0: 226.0. Samples: 432468. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:39:18,181][00194] Avg episode reward: [(0, '27.574')] +[2024-09-01 16:39:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5746688. Throughput: 0: 223.6. Samples: 433670. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:39:23,186][00194] Avg episode reward: [(0, '27.670')] +[2024-09-01 16:39:28,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5750784. Throughput: 0: 244.6. Samples: 435342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:39:28,187][00194] Avg episode reward: [(0, '28.205')] +[2024-09-01 16:39:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5754880. Throughput: 0: 232.3. Samples: 435880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:39:33,186][00194] Avg episode reward: [(0, '28.126')] +[2024-09-01 16:39:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5758976. Throughput: 0: 212.3. Samples: 436844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:39:38,180][00194] Avg episode reward: [(0, '28.321')] +[2024-09-01 16:39:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5763072. Throughput: 0: 222.5. Samples: 438484. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 16:39:43,186][00194] Avg episode reward: [(0, '28.215')] +[2024-09-01 16:39:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5767168. Throughput: 0: 229.5. Samples: 439124. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:39:48,180][00194] Avg episode reward: [(0, '28.771')] +[2024-09-01 16:39:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5771264. Throughput: 0: 221.1. Samples: 440588. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:39:53,184][00194] Avg episode reward: [(0, '29.294')] +[2024-09-01 16:39:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5775360. Throughput: 0: 226.7. Samples: 441952. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:39:58,180][00194] Avg episode reward: [(0, '29.630')] +[2024-09-01 16:39:59,619][26015] Updated weights for policy 0, policy_version 1411 (0.2575) +[2024-09-01 16:40:03,032][26002] Saving new best policy, reward=29.630! +[2024-09-01 16:40:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5783552. Throughput: 0: 224.9. Samples: 442588. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:03,180][00194] Avg episode reward: [(0, '29.775')] +[2024-09-01 16:40:07,874][26002] Saving new best policy, reward=29.775! +[2024-09-01 16:40:08,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5787648. Throughput: 0: 230.4. Samples: 444036. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:08,184][00194] Avg episode reward: [(0, '29.622')] +[2024-09-01 16:40:13,177][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 874.7). Total num frames: 5787648. Throughput: 0: 215.9. Samples: 445056. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:13,182][00194] Avg episode reward: [(0, '29.296')] +[2024-09-01 16:40:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 5795840. Throughput: 0: 225.8. Samples: 446042. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:18,180][00194] Avg episode reward: [(0, '28.875')] +[2024-09-01 16:40:23,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5799936. Throughput: 0: 235.5. Samples: 447442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:23,179][00194] Avg episode reward: [(0, '29.872')] +[2024-09-01 16:40:25,841][26002] Saving new best policy, reward=29.872! +[2024-09-01 16:40:28,181][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 874.7). Total num frames: 5804032. Throughput: 0: 226.6. Samples: 448684. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:28,184][00194] Avg episode reward: [(0, '29.518')] +[2024-09-01 16:40:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5808128. Throughput: 0: 223.5. Samples: 449180. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:33,179][00194] Avg episode reward: [(0, '29.178')] +[2024-09-01 16:40:38,177][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5812224. Throughput: 0: 228.6. Samples: 450874. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:38,181][00194] Avg episode reward: [(0, '29.242')] +[2024-09-01 16:40:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 874.7). Total num frames: 5816320. Throughput: 0: 228.0. Samples: 452210. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:40:43,180][00194] Avg episode reward: [(0, '29.231')] +[2024-09-01 16:40:44,210][26015] Updated weights for policy 0, policy_version 1421 (0.0544) +[2024-09-01 16:40:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5820416. Throughput: 0: 223.9. Samples: 452662. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:40:48,186][00194] Avg episode reward: [(0, '29.310')] +[2024-09-01 16:40:49,709][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001422_5824512.pth... +[2024-09-01 16:40:49,817][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001370_5611520.pth +[2024-09-01 16:40:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5824512. Throughput: 0: 227.0. Samples: 454250. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:40:53,180][00194] Avg episode reward: [(0, '29.215')] +[2024-09-01 16:40:58,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5832704. Throughput: 0: 229.6. Samples: 455390. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:40:58,187][00194] Avg episode reward: [(0, '29.728')] +[2024-09-01 16:41:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5836800. Throughput: 0: 226.2. Samples: 456220. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:03,183][00194] Avg episode reward: [(0, '29.735')] +[2024-09-01 16:41:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5840896. Throughput: 0: 218.3. Samples: 457266. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:08,188][00194] Avg episode reward: [(0, '29.735')] +[2024-09-01 16:41:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5844992. Throughput: 0: 227.1. Samples: 458904. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:13,180][00194] Avg episode reward: [(0, '29.259')] +[2024-09-01 16:41:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5849088. Throughput: 0: 231.3. Samples: 459588. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:18,180][00194] Avg episode reward: [(0, '30.461')] +[2024-09-01 16:41:20,439][26002] Saving new best policy, reward=30.461! +[2024-09-01 16:41:23,183][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 5853184. Throughput: 0: 220.0. Samples: 460776. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:23,186][00194] Avg episode reward: [(0, '30.677')] +[2024-09-01 16:41:25,831][26002] Saving new best policy, reward=30.677! +[2024-09-01 16:41:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5857280. Throughput: 0: 225.9. Samples: 462376. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:28,180][00194] Avg episode reward: [(0, '31.919')] +[2024-09-01 16:41:30,117][26015] Updated weights for policy 0, policy_version 1431 (0.1548) +[2024-09-01 16:41:32,499][26002] Signal inference workers to stop experience collection... (450 times) +[2024-09-01 16:41:32,562][26015] InferenceWorker_p0-w0: stopping experience collection (450 times) +[2024-09-01 16:41:33,177][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5861376. Throughput: 0: 226.8. Samples: 462868. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:33,185][00194] Avg episode reward: [(0, '32.206')] +[2024-09-01 16:41:33,431][26002] Saving new best policy, reward=31.919! +[2024-09-01 16:41:33,433][26002] Signal inference workers to resume experience collection... (450 times) +[2024-09-01 16:41:33,443][26015] InferenceWorker_p0-w0: resuming experience collection (450 times) +[2024-09-01 16:41:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5865472. Throughput: 0: 227.5. Samples: 464488. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:38,180][00194] Avg episode reward: [(0, '31.630')] +[2024-09-01 16:41:38,667][26002] Saving new best policy, reward=32.206! +[2024-09-01 16:41:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5869568. Throughput: 0: 225.4. Samples: 465532. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:43,190][00194] Avg episode reward: [(0, '31.349')] +[2024-09-01 16:41:48,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5877760. Throughput: 0: 227.9. Samples: 466474. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 16:41:48,180][00194] Avg episode reward: [(0, '30.458')] +[2024-09-01 16:41:53,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5881856. Throughput: 0: 232.0. Samples: 467704. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:41:53,180][00194] Avg episode reward: [(0, '30.334')] +[2024-09-01 16:41:58,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5885952. Throughput: 0: 221.0. Samples: 468848. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:41:58,185][00194] Avg episode reward: [(0, '30.870')] +[2024-09-01 16:42:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5890048. Throughput: 0: 224.7. Samples: 469698. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:03,180][00194] Avg episode reward: [(0, '30.500')] +[2024-09-01 16:42:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5894144. Throughput: 0: 233.7. Samples: 471290. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:08,187][00194] Avg episode reward: [(0, '30.023')] +[2024-09-01 16:42:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5898240. Throughput: 0: 224.9. Samples: 472496. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:42:13,182][00194] Avg episode reward: [(0, '29.890')] +[2024-09-01 16:42:15,935][26015] Updated weights for policy 0, policy_version 1441 (0.2283) +[2024-09-01 16:42:18,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5902336. Throughput: 0: 221.2. Samples: 472820. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:42:18,180][00194] Avg episode reward: [(0, '29.184')] +[2024-09-01 16:42:23,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 5906432. Throughput: 0: 226.5. Samples: 474682. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:42:23,183][00194] Avg episode reward: [(0, '29.022')] +[2024-09-01 16:42:28,182][00194] Fps is (10 sec: 1228.2, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 5914624. Throughput: 0: 234.3. Samples: 476078. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:28,185][00194] Avg episode reward: [(0, '30.580')] +[2024-09-01 16:42:33,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5918720. Throughput: 0: 229.8. Samples: 476814. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:33,179][00194] Avg episode reward: [(0, '30.606')] +[2024-09-01 16:42:38,177][00194] Fps is (10 sec: 819.6, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5922816. Throughput: 0: 226.2. Samples: 477882. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:38,184][00194] Avg episode reward: [(0, '30.535')] +[2024-09-01 16:42:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 5926912. Throughput: 0: 236.6. Samples: 479494. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:43,180][00194] Avg episode reward: [(0, '30.297')] +[2024-09-01 16:42:48,178][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5931008. Throughput: 0: 233.8. Samples: 480220. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:48,186][00194] Avg episode reward: [(0, '29.842')] +[2024-09-01 16:42:50,977][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001449_5935104.pth... +[2024-09-01 16:42:51,095][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001396_5718016.pth +[2024-09-01 16:42:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5935104. Throughput: 0: 221.6. Samples: 481264. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:42:53,180][00194] Avg episode reward: [(0, '29.402')] +[2024-09-01 16:42:58,177][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5939200. Throughput: 0: 229.0. Samples: 482802. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:42:58,180][00194] Avg episode reward: [(0, '29.277')] +[2024-09-01 16:42:59,731][26015] Updated weights for policy 0, policy_version 1451 (0.1034) +[2024-09-01 16:43:03,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5943296. Throughput: 0: 241.0. Samples: 483664. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:43:03,182][00194] Avg episode reward: [(0, '30.124')] +[2024-09-01 16:43:08,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5947392. Throughput: 0: 225.6. Samples: 484836. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) +[2024-09-01 16:43:08,184][00194] Avg episode reward: [(0, '30.448')] +[2024-09-01 16:43:13,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 5951488. Throughput: 0: 222.8. Samples: 486104. Policy #0 lag: (min: 1.0, avg: 1.8, max: 2.0) +[2024-09-01 16:43:13,184][00194] Avg episode reward: [(0, '30.254')] +[2024-09-01 16:43:18,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5959680. Throughput: 0: 226.3. Samples: 486998. Policy #0 lag: (min: 1.0, avg: 1.8, max: 3.0) +[2024-09-01 16:43:18,180][00194] Avg episode reward: [(0, '30.282')] +[2024-09-01 16:43:23,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5963776. Throughput: 0: 234.2. Samples: 488422. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:43:23,181][00194] Avg episode reward: [(0, '30.728')] +[2024-09-01 16:43:28,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5967872. Throughput: 0: 221.6. Samples: 489468. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 16:43:28,186][00194] Avg episode reward: [(0, '29.194')] +[2024-09-01 16:43:33,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5971968. Throughput: 0: 221.4. Samples: 490182. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:43:33,185][00194] Avg episode reward: [(0, '29.182')] +[2024-09-01 16:43:38,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5976064. Throughput: 0: 230.0. Samples: 491614. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:43:38,180][00194] Avg episode reward: [(0, '29.130')] +[2024-09-01 16:43:43,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5980160. Throughput: 0: 229.9. Samples: 493148. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:43:43,182][00194] Avg episode reward: [(0, '28.244')] +[2024-09-01 16:43:44,795][26015] Updated weights for policy 0, policy_version 1461 (0.1507) +[2024-09-01 16:43:48,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5984256. Throughput: 0: 217.7. Samples: 493462. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:43:48,179][00194] Avg episode reward: [(0, '28.026')] +[2024-09-01 16:43:53,177][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 5988352. Throughput: 0: 230.8. Samples: 495220. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 16:43:53,180][00194] Avg episode reward: [(0, '27.804')] +[2024-09-01 16:43:58,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 5996544. Throughput: 0: 233.6. Samples: 496618. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:43:58,186][00194] Avg episode reward: [(0, '29.239')] +[2024-09-01 16:44:03,177][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 6000640. Throughput: 0: 229.3. Samples: 497318. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 16:44:03,184][00194] Avg episode reward: [(0, '28.802')] +[2024-09-01 16:44:07,573][26002] Stopping Batcher_0... +[2024-09-01 16:44:07,574][26002] Loop batcher_evt_loop terminating... +[2024-09-01 16:44:07,585][00194] Component Batcher_0 stopped! +[2024-09-01 16:44:07,630][26015] Weights refcount: 2 0 +[2024-09-01 16:44:07,635][00194] Component InferenceWorker_p0-w0 stopped! +[2024-09-01 16:44:07,638][26015] Stopping InferenceWorker_p0-w0... +[2024-09-01 16:44:07,642][26015] Loop inference_proc0-0_evt_loop terminating... +[2024-09-01 16:44:08,076][26021] Stopping RolloutWorker_w5... +[2024-09-01 16:44:08,086][26021] Loop rollout_proc5_evt_loop terminating... +[2024-09-01 16:44:08,077][00194] Component RolloutWorker_w5 stopped! +[2024-09-01 16:44:08,115][00194] Component RolloutWorker_w3 stopped! +[2024-09-01 16:44:08,115][26019] Stopping RolloutWorker_w3... +[2024-09-01 16:44:08,132][26019] Loop rollout_proc3_evt_loop terminating... +[2024-09-01 16:44:08,133][26023] Stopping RolloutWorker_w7... +[2024-09-01 16:44:08,141][26023] Loop rollout_proc7_evt_loop terminating... +[2024-09-01 16:44:08,134][00194] Component RolloutWorker_w7 stopped! +[2024-09-01 16:44:08,164][00194] Component RolloutWorker_w0 stopped! +[2024-09-01 16:44:08,181][00194] Component RolloutWorker_w2 stopped! +[2024-09-01 16:44:08,190][26017] Stopping RolloutWorker_w2... +[2024-09-01 16:44:08,173][26016] Stopping RolloutWorker_w0... +[2024-09-01 16:44:08,205][26016] Loop rollout_proc0_evt_loop terminating... +[2024-09-01 16:44:08,208][26017] Loop rollout_proc2_evt_loop terminating... +[2024-09-01 16:44:08,247][00194] Component RolloutWorker_w4 stopped! +[2024-09-01 16:44:08,248][26020] Stopping RolloutWorker_w4... +[2024-09-01 16:44:08,257][26020] Loop rollout_proc4_evt_loop terminating... +[2024-09-01 16:44:08,276][26018] Stopping RolloutWorker_w1... +[2024-09-01 16:44:08,276][00194] Component RolloutWorker_w1 stopped! +[2024-09-01 16:44:08,276][26018] Loop rollout_proc1_evt_loop terminating... +[2024-09-01 16:44:08,333][26022] Stopping RolloutWorker_w6... +[2024-09-01 16:44:08,332][00194] Component RolloutWorker_w6 stopped! +[2024-09-01 16:44:08,335][26022] Loop rollout_proc6_evt_loop terminating... +[2024-09-01 16:44:12,546][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001467_6008832.pth... +[2024-09-01 16:44:12,616][26002] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001422_5824512.pth +[2024-09-01 16:44:12,624][26002] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001467_6008832.pth... +[2024-09-01 16:44:12,714][26002] Stopping LearnerWorker_p0... +[2024-09-01 16:44:12,715][26002] Loop learner_proc0_evt_loop terminating... +[2024-09-01 16:44:12,715][00194] Component LearnerWorker_p0 stopped! +[2024-09-01 16:44:12,718][00194] Waiting for process learner_proc0 to stop... +[2024-09-01 16:44:13,178][00194] Waiting for process inference_proc0-0 to join... +[2024-09-01 16:44:13,183][00194] Waiting for process rollout_proc0 to join... +[2024-09-01 16:44:13,189][00194] Waiting for process rollout_proc1 to join... +[2024-09-01 16:44:13,193][00194] Waiting for process rollout_proc2 to join... +[2024-09-01 16:44:13,199][00194] Waiting for process rollout_proc3 to join... +[2024-09-01 16:44:13,208][00194] Waiting for process rollout_proc4 to join... +[2024-09-01 16:44:13,214][00194] Waiting for process rollout_proc5 to join... +[2024-09-01 16:44:13,220][00194] Waiting for process rollout_proc6 to join... +[2024-09-01 16:44:13,225][00194] Waiting for process rollout_proc7 to join... +[2024-09-01 16:44:13,234][00194] Batcher 0 profile tree view: +batching: 9.2162, releasing_batches: 0.1772 +[2024-09-01 16:44:13,240][00194] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0001 - wait_policy_total: 10.2431 -one_step: 0.0931 - handle_policy_step: 7.0708 - deserialize: 0.1442, stack: 0.0418, obs_to_device_normalize: 0.8121, forward: 5.4789, send_messages: 0.1427 - prepare_outputs: 0.1556 - to_cpu: 0.0223 -[2024-09-01 10:34:23,229][00307] Learner 0 profile tree view: -misc: 0.0000, prepare_batch: 4.7974 -train: 8.9264 - epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0002, kl_divergence: 0.0007, after_optimizer: 0.0078 - calculate_losses: 3.4527 - losses_init: 0.0000, forward_head: 3.1163, bptt_initial: 0.0080, tail: 0.0106, advantages_returns: 0.0028, losses: 0.0030 - bptt: 0.3114 - bptt_forward_core: 0.3103 - update: 5.4628 - clip: 0.0221 -[2024-09-01 10:34:23,235][00307] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.0115, enqueue_policy_requests: 0.1034, env_step: 2.8720, overhead: 0.1024, complete_rollouts: 0.0464 -save_policy_outputs: 0.0846 - split_output_tensors: 0.0340 -[2024-09-01 10:34:23,244][00307] RolloutWorker_w7 profile tree view: -wait_for_trajectories: 0.0249, enqueue_policy_requests: 0.0434, env_step: 1.8572, overhead: 0.0427, complete_rollouts: 0.0754 -save_policy_outputs: 0.0636 - split_output_tensors: 0.0064 -[2024-09-01 10:34:23,252][00307] Loop Runner_EvtLoop terminating... -[2024-09-01 10:34:23,286][00307] Runner profile tree view: -main_loop: 51.2513 -[2024-09-01 10:34:23,294][00307] Collected {0: 12017664}, FPS: 159.8 -[2024-09-01 10:52:55,466][00307] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-01 10:52:55,468][00307] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-01 10:52:55,474][00307] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-01 10:52:55,477][00307] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-01 10:52:55,481][00307] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-01 10:52:55,483][00307] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-01 10:52:55,485][00307] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-09-01 10:52:55,490][00307] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-01 10:52:55,491][00307] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-09-01 10:52:55,492][00307] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-09-01 10:52:55,494][00307] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-01 10:52:55,495][00307] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-01 10:52:55,496][00307] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-01 10:52:55,498][00307] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-01 10:52:55,499][00307] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-01 10:52:55,523][00307] RunningMeanStd input shape: (3, 72, 128) -[2024-09-01 10:52:55,525][00307] RunningMeanStd input shape: (1,) -[2024-09-01 10:52:55,546][00307] ConvEncoder: input_channels=3 -[2024-09-01 10:52:55,598][00307] Conv encoder output size: 512 -[2024-09-01 10:52:55,601][00307] Policy head output size: 512 -[2024-09-01 10:52:55,621][00307] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002934_12017664.pth... -[2024-09-01 10:52:56,283][00307] Num frames 100... -[2024-09-01 10:52:56,587][00307] Num frames 200... -[2024-09-01 10:52:56,857][00307] Num frames 300... -[2024-09-01 10:52:57,133][00307] Num frames 400... -[2024-09-01 10:52:57,406][00307] Num frames 500... -[2024-09-01 10:52:57,683][00307] Num frames 600... -[2024-09-01 10:52:57,954][00307] Num frames 700... -[2024-09-01 10:52:58,248][00307] Num frames 800... -[2024-09-01 10:52:58,531][00307] Num frames 900... -[2024-09-01 10:52:58,817][00307] Num frames 1000... -[2024-09-01 10:52:59,088][00307] Num frames 1100... -[2024-09-01 10:52:59,369][00307] Num frames 1200... -[2024-09-01 10:52:59,597][00307] Num frames 1300... -[2024-09-01 10:52:59,823][00307] Avg episode rewards: #0: 37.810, true rewards: #0: 13.810 -[2024-09-01 10:52:59,825][00307] Avg episode reward: 37.810, avg true_objective: 13.810 -[2024-09-01 10:52:59,864][00307] Num frames 1400... -[2024-09-01 10:53:00,085][00307] Num frames 1500... -[2024-09-01 10:53:00,288][00307] Num frames 1600... -[2024-09-01 10:53:00,488][00307] Num frames 1700... -[2024-09-01 10:53:00,686][00307] Num frames 1800... -[2024-09-01 10:53:00,892][00307] Num frames 1900... -[2024-09-01 10:53:01,090][00307] Num frames 2000... -[2024-09-01 10:53:01,300][00307] Num frames 2100... -[2024-09-01 10:53:01,494][00307] Num frames 2200... -[2024-09-01 10:53:01,709][00307] Num frames 2300... -[2024-09-01 10:53:01,929][00307] Num frames 2400... -[2024-09-01 10:53:02,163][00307] Num frames 2500... -[2024-09-01 10:53:02,381][00307] Num frames 2600... -[2024-09-01 10:53:02,591][00307] Num frames 2700... -[2024-09-01 10:53:02,843][00307] Num frames 2800... -[2024-09-01 10:53:03,050][00307] Num frames 2900... -[2024-09-01 10:53:03,252][00307] Num frames 3000... -[2024-09-01 10:53:03,451][00307] Num frames 3100... -[2024-09-01 10:53:03,653][00307] Num frames 3200... -[2024-09-01 10:53:03,865][00307] Num frames 3300... -[2024-09-01 10:53:03,967][00307] Avg episode rewards: #0: 45.089, true rewards: #0: 16.590 -[2024-09-01 10:53:03,969][00307] Avg episode reward: 45.089, avg true_objective: 16.590 -[2024-09-01 10:53:04,125][00307] Num frames 3400... -[2024-09-01 10:53:04,327][00307] Num frames 3500... -[2024-09-01 10:53:04,528][00307] Num frames 3600... -[2024-09-01 10:53:04,661][00307] Avg episode rewards: #0: 31.460, true rewards: #0: 12.127 -[2024-09-01 10:53:04,663][00307] Avg episode reward: 31.460, avg true_objective: 12.127 -[2024-09-01 10:53:04,780][00307] Num frames 3700... -[2024-09-01 10:53:05,006][00307] Num frames 3800... -[2024-09-01 10:53:05,215][00307] Num frames 3900... -[2024-09-01 10:53:05,416][00307] Num frames 4000... -[2024-09-01 10:53:05,618][00307] Num frames 4100... -[2024-09-01 10:53:05,713][00307] Avg episode rewards: #0: 26.045, true rewards: #0: 10.295 -[2024-09-01 10:53:05,716][00307] Avg episode reward: 26.045, avg true_objective: 10.295 -[2024-09-01 10:53:05,879][00307] Num frames 4200... -[2024-09-01 10:53:06,092][00307] Num frames 4300... -[2024-09-01 10:53:06,303][00307] Num frames 4400... -[2024-09-01 10:53:06,502][00307] Num frames 4500... -[2024-09-01 10:53:06,705][00307] Num frames 4600... -[2024-09-01 10:53:06,904][00307] Num frames 4700... -[2024-09-01 10:53:07,118][00307] Num frames 4800... -[2024-09-01 10:53:07,332][00307] Num frames 4900... -[2024-09-01 10:53:07,534][00307] Num frames 5000... -[2024-09-01 10:53:07,728][00307] Num frames 5100... -[2024-09-01 10:53:07,852][00307] Avg episode rewards: #0: 25.470, true rewards: #0: 10.270 -[2024-09-01 10:53:07,853][00307] Avg episode reward: 25.470, avg true_objective: 10.270 -[2024-09-01 10:53:07,985][00307] Num frames 5200... -[2024-09-01 10:53:08,188][00307] Num frames 5300... -[2024-09-01 10:53:08,392][00307] Num frames 5400... -[2024-09-01 10:53:08,586][00307] Num frames 5500... -[2024-09-01 10:53:08,741][00307] Avg episode rewards: #0: 22.252, true rewards: #0: 9.252 -[2024-09-01 10:53:08,744][00307] Avg episode reward: 22.252, avg true_objective: 9.252 -[2024-09-01 10:53:08,844][00307] Num frames 5600... -[2024-09-01 10:53:09,061][00307] Num frames 5700... -[2024-09-01 10:53:09,259][00307] Num frames 5800... -[2024-09-01 10:53:09,397][00307] Avg episode rewards: #0: 19.913, true rewards: #0: 8.341 -[2024-09-01 10:53:09,400][00307] Avg episode reward: 19.913, avg true_objective: 8.341 -[2024-09-01 10:53:09,555][00307] Num frames 5900... -[2024-09-01 10:53:09,838][00307] Num frames 6000... -[2024-09-01 10:53:10,124][00307] Num frames 6100... -[2024-09-01 10:53:10,393][00307] Num frames 6200... -[2024-09-01 10:53:10,661][00307] Num frames 6300... -[2024-09-01 10:53:10,926][00307] Num frames 6400... -[2024-09-01 10:53:11,226][00307] Num frames 6500... -[2024-09-01 10:53:11,501][00307] Num frames 6600... -[2024-09-01 10:53:11,778][00307] Num frames 6700... -[2024-09-01 10:53:12,056][00307] Num frames 6800... -[2024-09-01 10:53:12,357][00307] Num frames 6900... -[2024-09-01 10:53:12,637][00307] Num frames 7000... -[2024-09-01 10:53:12,863][00307] Num frames 7100... -[2024-09-01 10:53:13,026][00307] Avg episode rewards: #0: 21.439, true rewards: #0: 8.939 -[2024-09-01 10:53:13,029][00307] Avg episode reward: 21.439, avg true_objective: 8.939 -[2024-09-01 10:53:13,127][00307] Num frames 7200... -[2024-09-01 10:53:13,345][00307] Num frames 7300... -[2024-09-01 10:53:13,540][00307] Num frames 7400... -[2024-09-01 10:53:13,746][00307] Num frames 7500... -[2024-09-01 10:53:13,952][00307] Num frames 7600... -[2024-09-01 10:53:14,157][00307] Num frames 7700... -[2024-09-01 10:53:14,373][00307] Num frames 7800... -[2024-09-01 10:53:14,583][00307] Num frames 7900... -[2024-09-01 10:53:14,780][00307] Num frames 8000... -[2024-09-01 10:53:14,982][00307] Num frames 8100... -[2024-09-01 10:53:15,186][00307] Num frames 8200... -[2024-09-01 10:53:15,403][00307] Num frames 8300... -[2024-09-01 10:53:15,607][00307] Num frames 8400... -[2024-09-01 10:53:15,726][00307] Avg episode rewards: #0: 22.146, true rewards: #0: 9.368 -[2024-09-01 10:53:15,728][00307] Avg episode reward: 22.146, avg true_objective: 9.368 -[2024-09-01 10:53:15,880][00307] Num frames 8500... -[2024-09-01 10:53:16,088][00307] Num frames 8600... -[2024-09-01 10:53:16,311][00307] Num frames 8700... -[2024-09-01 10:53:16,522][00307] Num frames 8800... -[2024-09-01 10:53:16,719][00307] Num frames 8900... -[2024-09-01 10:53:16,927][00307] Num frames 9000... -[2024-09-01 10:53:17,154][00307] Num frames 9100... -[2024-09-01 10:53:17,379][00307] Num frames 9200... -[2024-09-01 10:53:17,574][00307] Num frames 9300... -[2024-09-01 10:53:17,761][00307] Num frames 9400... -[2024-09-01 10:53:17,960][00307] Num frames 9500... -[2024-09-01 10:53:18,175][00307] Num frames 9600... -[2024-09-01 10:53:18,391][00307] Num frames 9700... -[2024-09-01 10:53:18,595][00307] Num frames 9800... -[2024-09-01 10:53:18,812][00307] Num frames 9900... -[2024-09-01 10:53:19,017][00307] Num frames 10000... -[2024-09-01 10:53:19,226][00307] Num frames 10100... -[2024-09-01 10:53:19,444][00307] Num frames 10200... -[2024-09-01 10:53:19,650][00307] Num frames 10300... -[2024-09-01 10:53:19,850][00307] Num frames 10400... -[2024-09-01 10:53:20,057][00307] Num frames 10500... -[2024-09-01 10:53:20,175][00307] Avg episode rewards: #0: 25.831, true rewards: #0: 10.531 -[2024-09-01 10:53:20,178][00307] Avg episode reward: 25.831, avg true_objective: 10.531 -[2024-09-01 10:54:35,854][00307] Replay video saved to /content/train_dir/default_experiment/replay.mp4! + wait_policy_total: 30.9066 +update_model: 81.0060 + weight_update: 0.1005 +one_step: 0.0376 + handle_policy_step: 1436.1295 + deserialize: 44.8616, stack: 7.2046, obs_to_device_normalize: 241.7567, forward: 1055.2233, send_messages: 32.3865 + prepare_outputs: 17.1816 + to_cpu: 1.7264 +[2024-09-01 16:44:13,242][00194] Learner 0 profile tree view: +misc: 0.0034, prepare_batch: 631.4118 +train: 1567.9798 + epoch_init: 0.0036, minibatch_init: 0.0053, losses_postprocess: 0.0786, kl_divergence: 0.2734, after_optimizer: 1.2242 + calculate_losses: 757.4961 + losses_init: 0.0022, forward_head: 673.4290, bptt_initial: 2.1597, tail: 1.6841, advantages_returns: 0.1136, losses: 0.8179 + bptt: 79.0010 + bptt_forward_core: 78.5114 + update: 808.5816 + clip: 1.8736 +[2024-09-01 16:44:13,244][00194] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.2782, enqueue_policy_requests: 28.3148, env_step: 831.1422, overhead: 20.8807, complete_rollouts: 8.1561 +save_policy_outputs: 22.1874 + split_output_tensors: 7.4809 +[2024-09-01 16:44:13,247][00194] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3237, enqueue_policy_requests: 27.7500, env_step: 817.1254, overhead: 19.3453, complete_rollouts: 9.0290 +save_policy_outputs: 21.3592 + split_output_tensors: 7.1296 +[2024-09-01 16:44:13,251][00194] Loop Runner_EvtLoop terminating... +[2024-09-01 16:44:13,253][00194] Runner profile tree view: +main_loop: 2242.7525 +[2024-09-01 16:44:13,254][00194] Collected {0: 6008832}, FPS: 887.6 +[2024-09-01 16:49:06,149][00194] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-01 16:49:06,153][00194] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-01 16:49:06,156][00194] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-01 16:49:06,159][00194] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-01 16:49:06,162][00194] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-01 16:49:06,165][00194] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-01 16:49:06,167][00194] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-01 16:49:06,170][00194] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-01 16:49:06,171][00194] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-01 16:49:06,173][00194] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-01 16:49:06,174][00194] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-01 16:49:06,175][00194] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-01 16:49:06,176][00194] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-01 16:49:06,177][00194] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-01 16:49:06,180][00194] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-01 16:49:06,214][00194] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 16:49:06,218][00194] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 16:49:06,223][00194] RunningMeanStd input shape: (1,) +[2024-09-01 16:49:06,266][00194] ConvEncoder: input_channels=3 +[2024-09-01 16:49:06,433][00194] Conv encoder output size: 512 +[2024-09-01 16:49:06,435][00194] Policy head output size: 512 +[2024-09-01 16:49:06,461][00194] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001467_6008832.pth... +[2024-09-01 16:49:07,124][00194] Num frames 100... +[2024-09-01 16:49:07,354][00194] Num frames 200... +[2024-09-01 16:49:07,571][00194] Num frames 300... +[2024-09-01 16:49:07,831][00194] Num frames 400... +[2024-09-01 16:49:08,050][00194] Num frames 500... +[2024-09-01 16:49:08,267][00194] Num frames 600... +[2024-09-01 16:49:08,478][00194] Num frames 700... +[2024-09-01 16:49:08,688][00194] Num frames 800... +[2024-09-01 16:49:08,901][00194] Num frames 900... +[2024-09-01 16:49:09,119][00194] Num frames 1000... +[2024-09-01 16:49:09,328][00194] Avg episode rewards: #0: 23.710, true rewards: #0: 10.710 +[2024-09-01 16:49:09,330][00194] Avg episode reward: 23.710, avg true_objective: 10.710 +[2024-09-01 16:49:09,398][00194] Num frames 1100... +[2024-09-01 16:49:09,614][00194] Num frames 1200... +[2024-09-01 16:49:09,843][00194] Num frames 1300... +[2024-09-01 16:49:10,075][00194] Num frames 1400... +[2024-09-01 16:49:10,305][00194] Num frames 1500... +[2024-09-01 16:49:10,511][00194] Avg episode rewards: #0: 16.355, true rewards: #0: 7.855 +[2024-09-01 16:49:10,513][00194] Avg episode reward: 16.355, avg true_objective: 7.855 +[2024-09-01 16:49:10,583][00194] Num frames 1600... +[2024-09-01 16:49:10,803][00194] Num frames 1700... +[2024-09-01 16:49:11,016][00194] Num frames 1800... +[2024-09-01 16:49:11,226][00194] Num frames 1900... +[2024-09-01 16:49:11,441][00194] Num frames 2000... +[2024-09-01 16:49:11,660][00194] Num frames 2100... +[2024-09-01 16:49:11,879][00194] Num frames 2200... +[2024-09-01 16:49:12,121][00194] Num frames 2300... +[2024-09-01 16:49:12,410][00194] Num frames 2400... +[2024-09-01 16:49:12,701][00194] Num frames 2500... +[2024-09-01 16:49:12,990][00194] Num frames 2600... +[2024-09-01 16:49:13,269][00194] Num frames 2700... +[2024-09-01 16:49:13,547][00194] Num frames 2800... +[2024-09-01 16:49:13,834][00194] Num frames 2900... +[2024-09-01 16:49:14,134][00194] Num frames 3000... +[2024-09-01 16:49:14,217][00194] Avg episode rewards: #0: 23.360, true rewards: #0: 10.027 +[2024-09-01 16:49:14,220][00194] Avg episode reward: 23.360, avg true_objective: 10.027 +[2024-09-01 16:49:14,490][00194] Num frames 3100... +[2024-09-01 16:49:14,772][00194] Num frames 3200... +[2024-09-01 16:49:15,076][00194] Num frames 3300... +[2024-09-01 16:49:15,334][00194] Num frames 3400... +[2024-09-01 16:49:15,572][00194] Avg episode rewards: #0: 19.970, true rewards: #0: 8.720 +[2024-09-01 16:49:15,574][00194] Avg episode reward: 19.970, avg true_objective: 8.720 +[2024-09-01 16:49:15,604][00194] Num frames 3500... +[2024-09-01 16:49:15,809][00194] Num frames 3600... +[2024-09-01 16:49:16,017][00194] Num frames 3700... +[2024-09-01 16:49:16,239][00194] Num frames 3800... +[2024-09-01 16:49:16,451][00194] Num frames 3900... +[2024-09-01 16:49:16,663][00194] Num frames 4000... +[2024-09-01 16:49:16,786][00194] Avg episode rewards: #0: 17.864, true rewards: #0: 8.064 +[2024-09-01 16:49:16,788][00194] Avg episode reward: 17.864, avg true_objective: 8.064 +[2024-09-01 16:49:16,929][00194] Num frames 4100... +[2024-09-01 16:49:17,154][00194] Num frames 4200... +[2024-09-01 16:49:17,358][00194] Num frames 4300... +[2024-09-01 16:49:17,559][00194] Num frames 4400... +[2024-09-01 16:49:17,767][00194] Num frames 4500... +[2024-09-01 16:49:17,925][00194] Avg episode rewards: #0: 16.073, true rewards: #0: 7.573 +[2024-09-01 16:49:17,927][00194] Avg episode reward: 16.073, avg true_objective: 7.573 +[2024-09-01 16:49:18,047][00194] Num frames 4600... +[2024-09-01 16:49:18,282][00194] Num frames 4700... +[2024-09-01 16:49:18,500][00194] Num frames 4800... +[2024-09-01 16:49:18,719][00194] Num frames 4900... +[2024-09-01 16:49:18,929][00194] Num frames 5000... +[2024-09-01 16:49:19,154][00194] Num frames 5100... +[2024-09-01 16:49:19,383][00194] Num frames 5200... +[2024-09-01 16:49:19,621][00194] Num frames 5300... +[2024-09-01 16:49:19,842][00194] Num frames 5400... +[2024-09-01 16:49:20,061][00194] Num frames 5500... +[2024-09-01 16:49:20,293][00194] Num frames 5600... +[2024-09-01 16:49:20,497][00194] Num frames 5700... +[2024-09-01 16:49:20,708][00194] Num frames 5800... +[2024-09-01 16:49:20,915][00194] Num frames 5900... +[2024-09-01 16:49:21,087][00194] Avg episode rewards: #0: 18.931, true rewards: #0: 8.503 +[2024-09-01 16:49:21,091][00194] Avg episode reward: 18.931, avg true_objective: 8.503 +[2024-09-01 16:49:21,191][00194] Num frames 6000... +[2024-09-01 16:49:21,410][00194] Num frames 6100... +[2024-09-01 16:49:21,625][00194] Num frames 6200... +[2024-09-01 16:49:21,831][00194] Num frames 6300... +[2024-09-01 16:49:22,044][00194] Num frames 6400... +[2024-09-01 16:49:22,269][00194] Num frames 6500... +[2024-09-01 16:49:22,490][00194] Num frames 6600... +[2024-09-01 16:49:22,705][00194] Num frames 6700... +[2024-09-01 16:49:22,943][00194] Num frames 6800... +[2024-09-01 16:49:23,176][00194] Num frames 6900... +[2024-09-01 16:49:23,417][00194] Num frames 7000... +[2024-09-01 16:49:23,654][00194] Num frames 7100... +[2024-09-01 16:49:23,871][00194] Num frames 7200... +[2024-09-01 16:49:24,100][00194] Num frames 7300... +[2024-09-01 16:49:24,317][00194] Num frames 7400... +[2024-09-01 16:49:24,542][00194] Num frames 7500... +[2024-09-01 16:49:24,760][00194] Avg episode rewards: #0: 21.964, true rewards: #0: 9.464 +[2024-09-01 16:49:24,762][00194] Avg episode reward: 21.964, avg true_objective: 9.464 +[2024-09-01 16:49:24,832][00194] Num frames 7600... +[2024-09-01 16:49:25,063][00194] Num frames 7700... +[2024-09-01 16:49:25,328][00194] Num frames 7800... +[2024-09-01 16:49:25,643][00194] Num frames 7900... +[2024-09-01 16:49:25,922][00194] Num frames 8000... +[2024-09-01 16:49:26,197][00194] Num frames 8100... +[2024-09-01 16:49:26,486][00194] Num frames 8200... +[2024-09-01 16:49:26,807][00194] Num frames 8300... +[2024-09-01 16:49:27,106][00194] Num frames 8400... +[2024-09-01 16:49:27,399][00194] Num frames 8500... +[2024-09-01 16:49:27,493][00194] Avg episode rewards: #0: 21.791, true rewards: #0: 9.458 +[2024-09-01 16:49:27,497][00194] Avg episode reward: 21.791, avg true_objective: 9.458 +[2024-09-01 16:49:27,755][00194] Num frames 8600... +[2024-09-01 16:49:28,054][00194] Num frames 8700... +[2024-09-01 16:49:28,371][00194] Num frames 8800... +[2024-09-01 16:49:28,609][00194] Num frames 8900... +[2024-09-01 16:49:28,835][00194] Num frames 9000... +[2024-09-01 16:49:28,942][00194] Avg episode rewards: #0: 20.724, true rewards: #0: 9.024 +[2024-09-01 16:49:28,943][00194] Avg episode reward: 20.724, avg true_objective: 9.024 +[2024-09-01 16:50:30,694][00194] Replay video saved to /content/train_dir/default_experiment/replay.mp4!